Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
flow.publish()
# Not collecting flow_id for deletion since this is a test for failed upload
self.assertEqual(api_call_mock.call_count, 1)
self.assertEqual(get_flow_mock.call_count, 1)
self.assertEqual(flow_exists_mock.call_count, 1)
flow_copy = copy.deepcopy(flow)
flow_copy.name = flow_copy.name[:-1]
get_flow_mock.return_value = flow_copy
flow_exists_mock.return_value = 1
with self.assertRaises(ValueError) as context_manager:
flow.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
flow.flow_id))
fixture = (
"The flow on the server is inconsistent with the local flow. "
"The server flow ID is 1. Please check manually and remove "
"the flow if necessary! Error is:\n"
"'Flow sklearn.ensemble.forest.RandomForestClassifier: "
"values for attribute 'name' differ: "
"'sklearn.ensemble.forest.RandomForestClassifier'"
"\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
)
self.assertEqual(context_manager.exception.args[0], fixture)
self.assertEqual(get_flow_mock.call_count, 2)
def _existing_setup_exists(self, classif):
flow = self.extension.model_to_flow(classif)
flow.name = 'TEST%s%s' % (get_sentinel(), flow.name)
flow.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
# although the flow exists, we can be sure there are no
# setups (yet) as it hasn't been ran
setup_id = openml.setups.setup_exists(flow)
self.assertFalse(setup_id)
setup_id = openml.setups.setup_exists(flow)
self.assertFalse(setup_id)
# now run the flow on an easy task:
task = openml.tasks.get_task(115) # diabetes
run = openml.runs.run_flow_on_task(flow, task)
# spoof flow id, otherwise the sentinel is ignored
run.flow_id = flow.flow_id
run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
# although the flow exists, we can be sure there are no
# setups (yet) as it hasn't been ran
setup_id = openml.setups.setup_exists(flow)
self.assertFalse(setup_id)
setup_id = openml.setups.setup_exists(flow)
self.assertFalse(setup_id)
# now run the flow on an easy task:
task = openml.tasks.get_task(115) # diabetes
run = openml.runs.run_flow_on_task(flow, task)
# spoof flow id, otherwise the sentinel is ignored
run.flow_id = flow.flow_id
run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
# download the run, as it contains the right setup id
run = openml.runs.get_run(run.run_id)
# execute the function we are interested in
setup_id = openml.setups.setup_exists(flow)
self.assertEqual(setup_id, run.setup_id)
def test_publish_existing_flow(self, flow_exists_mock):
clf = sklearn.tree.DecisionTreeClassifier(max_depth=2)
flow = self.extension.model_to_flow(clf)
flow_exists_mock.return_value = 1
with self.assertRaises(openml.exceptions.PyOpenMLError) as context_manager:
flow.publish(raise_error_if_exists=True)
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
flow.flow_id))
self.assertTrue('OpenMLFlow already exists' in context_manager.exception.message)
('estimator', sklearn.tree.DecisionTreeClassifier())
]
) # build a sklearn classifier
for task_id in benchmark_suite.tasks[:1]: # iterate over all tasks
task = openml.tasks.get_task(task_id) # download the OpenML task
X, y = task.get_X_and_y() # get the data (not used in this example)
openml.config.apikey = openml.config.apikey # set the OpenML Api Key
run = openml.runs.run_model_on_task(
clf, task, avoid_duplicate_runs=False
) # run classifier on splits (requires API key)
score = run.get_metric_fn(
sklearn.metrics.accuracy_score
) # print accuracy score
print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name, score.mean()))
run.publish() # publish the experiment on OpenML (optional)
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
run.run_id))
print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
['sklearn.model_selection._search.GridSearchCV',
'sklearn.pipeline.Pipeline',
'sklearn.linear_model.base.LinearRegression',
]
def _remove_random_state(flow):
if 'random_state' in flow.parameters:
del flow.parameters['random_state']
for component in flow.components.values():
_remove_random_state(component)
flow = self.extension.model_to_flow(clf)
flow, _ = self._add_sentinel_to_flow_name(flow, sentinel)
if not openml.flows.flow_exists(flow.name, flow.external_version):
flow.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))
task = openml.tasks.get_task(task_id)
X, y = task.get_X_and_y()
self.assertEqual(np.count_nonzero(np.isnan(X)), n_missing_vals)
run = openml.runs.run_flow_on_task(
flow=flow,
task=task,
seed=seed,
avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
)
run_ = run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
self.assertEqual(run_, run)
task = openml.tasks.get_task(115)
for clf in clfs:
try:
# first populate the server with this run.
# skip run if it was already performed.
run = openml.runs.run_model_on_task(
model=clf,
task=task,
seed=rs,
avoid_duplicate_runs=True,
upload_flow=True
)
run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
except openml.exceptions.PyOpenMLError:
# run already existed. Great.
pass
flow = self.extension.model_to_flow(clf)
flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
self.assertGreater(flow_exists, 0)
# Do NOT use get_flow reinitialization, this potentially sets
# hyperparameter values wrong. Rather use the local model.
downloaded_flow = openml.flows.get_flow(flow_exists)
downloaded_flow.model = clf
setup_exists = openml.setups.setup_exists(downloaded_flow)
self.assertGreater(setup_exists, 0)
run_ids = run_exists(task.task_id, setup_exists)
self.assertTrue(run_ids, msg=(run_ids, clf))
def test_initialize_model_from_run(self):
clf = sklearn.pipeline.Pipeline(steps=[
('Imputer', SimpleImputer(strategy='median')),
('VarianceThreshold', VarianceThreshold(threshold=0.05)),
('Estimator', GaussianNB())])
task = openml.tasks.get_task(11)
run = openml.runs.run_model_on_task(
model=clf,
task=task,
avoid_duplicate_runs=False,
)
run_ = run.publish()
TestBase._mark_entity_for_removal('run', run_.run_id)
TestBase.logger.info("collected from test_run_functions: {}".format(run_.run_id))
run = openml.runs.get_run(run_.run_id)
modelR = openml.runs.initialize_model_from_run(run_id=run.run_id)
modelS = openml.setups.initialize_model(setup_id=run.setup_id)
flowR = self.extension.model_to_flow(modelR)
flowS = self.extension.model_to_flow(modelS)
flowL = self.extension.model_to_flow(clf)
openml.flows.assert_flows_equal(flowR, flowL)
openml.flows.assert_flows_equal(flowS, flowL)
self.assertEqual(flowS.components['Imputer'].
parameters['strategy'], '"median"')
self.assertEqual(flowS.components['VarianceThreshold'].
parameters['threshold'], '0.05')
upload_flow=False
)
cache_path = os.path.join(
self.workdir,
'runs',
str(random.getrandbits(128)),
)
run.to_filesystem(cache_path)
loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
expected_message_regex = ("Flow does not exist on the server, "
"but 'flow.flow_id' is not None.")
with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
loaded_run.publish()
TestBase._mark_entity_for_removal('run', loaded_run.run_id)
TestBase.logger.info("collected from test_run_functions: {}".format(loaded_run.run_id))