Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_online_run_metric_score(self):
openml.config.server = self.production_server
# important to use binary classification task,
# due to assertions
run = openml.runs.get_run(9864498)
self._test_local_evaluations(run)
try:
model_prime = openml.runs.initialize_model_from_trace(
run_id=run.run_id,
repeat=0,
fold=0,
)
except openml.exceptions.OpenMLServerException as e:
e.message = "%s; run_id %d" % (e.message, run.run_id)
raise e
self._rerun_model_and_compare_predictions(run.run_id, model_prime,
seed, create_task_obj=True)
self._rerun_model_and_compare_predictions(run.run_id, model_prime,
seed, create_task_obj=False)
else:
run_downloaded = openml.runs.get_run(run.run_id)
sid = run_downloaded.setup_id
model_prime = openml.setups.initialize_model(sid)
self._rerun_model_and_compare_predictions(run.run_id, model_prime,
seed, create_task_obj=True)
self._rerun_model_and_compare_predictions(run.run_id, model_prime,
seed, create_task_obj=False)
# todo: check if runtime is present
self._check_fold_timing_evaluations(run.fold_evaluations, 1, num_folds,
task_type=task_type)
return run
def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
# it can take a while for a run to be processed on the OpenML (test)
# server however, sometimes it is good to wait (a bit) for this, to
# properly test a function. In this case, we wait for max_waiting_time_
# seconds on this to happen, probing the server every 10 seconds to
# speed up the process
# time.time() works in seconds
start_time = time.time()
while time.time() - start_time < max_waiting_time_seconds:
run = openml.runs.get_run(run_id, ignore_cache=True)
if len(run.evaluations) > 0:
return
else:
time.sleep(3)
raise RuntimeError('Could not find any evaluations! Please check whether run {} was '
'evaluated correctly on the server'.format(run_id))
sort_order='desc',
output_format='dataframe')
# Check if list is non-empty
self.assertGreater(len(evals_setups), 0)
# Check if length is accurate
self.assertEqual(len(evals_setups), len(evals))
# Check if output from sort is sorted in the right order
self.assertSequenceEqual(sorted(evals_setups['value'].tolist(), reverse=True),
evals_setups['value'].tolist())
# Check if output and order of list_evaluations is preserved
self.assertSequenceEqual(evals_setups['run_id'].tolist(), evals['run_id'].tolist())
# Check if the hyper-parameter column is as accurate and flow_id
for index, row in evals_setups.iterrows():
params = openml.runs.get_run(row['run_id']).parameter_settings
list1 = [param['oml:value'] for param in params]
list2 = list(row['parameters'].values())
# check if all values are equal
self.assertSequenceEqual(sorted(list1), sorted(list2))
return evals_setups
def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed,
create_task_obj):
run = openml.runs.get_run(run_id)
# TODO: assert holdout task
# downloads the predictions of the old task
file_id = run.output_files['predictions']
predictions_url = openml._api_calls._file_id_to_url(file_id)
response = openml._api_calls._download_text_file(predictions_url)
predictions = arff.loads(response)
# if create_task_obj=False, task argument in run_model_on_task is specified task_id
if create_task_obj:
task = openml.tasks.get_task(run.task_id)
run_prime = openml.runs.run_model_on_task(
model=model_prime,
task=task,
avoid_duplicate_runs=False,
"bootstrap": [True, False],
"criterion": ["gini", "entropy"]},
cv=StratifiedKFold(n_splits=2, shuffle=True),
n_iter=2)
task = openml.tasks.get_task(11)
run = openml.runs.run_model_on_task(
model=randomsearch,
task=task,
avoid_duplicate_runs=False,
seed=1,
)
run_ = run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
run = openml.runs.get_run(run_.run_id)
modelR = openml.runs.initialize_model_from_run(run_id=run.run_id)
modelS = openml.setups.initialize_model(setup_id=run.setup_id)
self.assertEqual(modelS.cv.random_state, 62501)
self.assertEqual(modelR.cv.random_state, 62501)
def get_layout_from_run(run_id):
"""
:param run_id: id of the run
:return: layout for run dashboard
"""
items = vars(runs.get_run(int(run_id)))
ordered_dict = (items['fold_evaluations'])
df = pd.DataFrame(ordered_dict.items(), columns=['evaluations', 'results'])
result_list = []
error = []
for dic in df['results']:
x = (dic[0])
values = [x[elem] for elem in x]
mean = str(round(np.mean(np.array(values), axis=0),3))
std = str(round(np.std(np.array(values), axis=0),3))
result_list.append(values)
error.append(mean+" \u00B1 "+std)
df.drop(['results'], axis=1, inplace=True)
df['results'] = result_list
df['values'] = error
d = df.drop(['results'], axis=1)