Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_evaluation_list_limit(self):
openml.config.server = self.production_server
evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
size=100, offset=100)
self.assertEqual(len(evaluations), 100)
def test_list_all_for_evaluations(self):
required_size = 22
# TODO apparently list_evaluations function does not support kwargs
evaluations = openml.evaluations.list_evaluations(function='predictive_accuracy',
size=required_size)
# might not be on test server after reset, please rerun test at least once if fails
self.assertEqual(len(evaluations), required_size)
def test_evaluation_list_filter_uploader_ID_16(self):
openml.config.server = self.production_server
uploader_id = 16
evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
uploader=[uploader_id],
output_format='dataframe')
self.assertEqual(evaluations['uploader'].unique(), [uploader_id])
self.assertGreater(len(evaluations), 50)
def test_evaluation_list_filter_run(self):
openml.config.server = self.production_server
run_id = 12
evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
run=[run_id])
self.assertEqual(len(evaluations), 1)
for run_id in evaluations.keys():
self.assertEqual(evaluations[run_id].run_id, run_id)
# default behaviour of this method: return aggregated results (not
# per fold)
self.assertIsNotNone(evaluations[run_id].value)
self.assertIsNone(evaluations[run_id].values)
def test_evaluation_list_filter_uploader_ID_10(self):
openml.config.server = self.production_server
setup_id = 10
evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
setup=[setup_id])
self.assertGreater(len(evaluations), 50)
for run_id in evaluations.keys():
self.assertEqual(evaluations[run_id].setup_id, setup_id)
# default behaviour of this method: return aggregated results (not
# per fold)
self.assertIsNotNone(evaluations[run_id].value)
self.assertIsNone(evaluations[run_id].values)
task_ids = [6]
uploader_ids = [1]
flow_ids = [6969]
evaluations = openml.evaluations.list_evaluations(
"predictive_accuracy", size=size, offset=0, task=task_ids,
flow=flow_ids, uploader=uploader_ids, per_fold=True)
self.assertEqual(len(evaluations), size)
for run_id in evaluations.keys():
self.assertIsNone(evaluations[run_id].value)
self.assertIsNotNone(evaluations[run_id].values)
# potentially we could also test array values, but these might be
# added in the future
evaluations = openml.evaluations.list_evaluations(
"predictive_accuracy", size=size, offset=0, task=task_ids,
flow=flow_ids, uploader=uploader_ids, per_fold=False)
for run_id in evaluations.keys():
self.assertIsNotNone(evaluations[run_id].value)
self.assertIsNone(evaluations[run_id].values)
def test_publish_study(self):
# get some random runs to attach
run_list = openml.evaluations.list_evaluations('predictive_accuracy', size=10)
self.assertEqual(len(run_list), 10)
fixt_alias = None
fixt_name = 'unit tested study'
fixt_descr = 'bla'
fixt_flow_ids = set([evaluation.flow_id for evaluation in run_list.values()])
fixt_task_ids = set([evaluation.task_id for evaluation in run_list.values()])
fixt_setup_ids = set([evaluation.setup_id for evaluation in run_list.values()])
study = openml.study.create_study(
alias=fixt_alias,
benchmark_suite=None,
name=fixt_name,
description=fixt_descr,
run_ids=list(run_list.keys())
)
def distplot_suite(pathname):
suite_id = int(re.search('collections/tasks/(\d+)', pathname).group(1))
suite = openml.study.get_suite(suite_id)
all_scores = []
glist = []
for task_id in suite.tasks:
evaluations = openml.evaluations.list_evaluations(task = [task_id], function = 'area_under_roc_curve', output_format='dataframe', size=10000)
print("eval for task id",task_id)
if(len(evaluations) == 0):
pass
else:
all_scores.append(evaluations)
x = evaluations.value.values
hist_data = [x]
group_labels = [evaluations.data_name.iloc[1]]
fig = ff.create_distplot(hist_data, group_labels, bin_size = 0.05)
graph = dcc.Graph(figure=fig)
glist.append(html.Div(dcc.Graph(figure=fig)))
return html.Div(glist)
def scatterplot_study(pathname, value):
print(value)
study_id = int(re.search('collections/runs/(\d+)', pathname).group(1))
study = openml.study.get_study(study_id)
runs = study.runs[1:300]
print(len(study.runs))
item = openml.evaluations.list_evaluations('predictive_accuracy', run=runs, output_format='dataframe', )
item_fold = openml.evaluations.list_evaluations('predictive_accuracy', run=runs, output_format='dataframe',
per_fold=True)
if(value == '0'):
fig = go.Figure(data=go.Scatter(x=item['value'], y=item['data_name'], mode='markers'))
else:
df = splitDataFrameList(item_fold, 'values')
dfs = dict(tuple(df.groupby('flow_name')))
key_list = list(dfs.keys())
fig = go.Figure()
for i in range(len(key_list)):
curr_df = dfs[str(key_list[i])]
fig.add_trace(go.Scatter(x=curr_df['values'], y=curr_df['data_name'],
mode='markers', name=str(key_list[i])))
graph = dcc.Graph(figure=fig)
return html.Div(graph)
# License: BSD 3-Clause
import openml
############################################################################
# Listing evaluations
# *******************
# Evaluations can be retrieved from the database in the chosen output format.
# Required filters can be applied to retrieve results from runs as required.
# We shall retrieve a small set (only 10 entries) to test the listing function for evaluations
openml.evaluations.list_evaluations(function='predictive_accuracy', size=10,
output_format='dataframe')
# Using other evaluation metrics, 'precision' in this case
evals = openml.evaluations.list_evaluations(function='precision', size=10,
output_format='dataframe')
# Querying the returned results for precision above 0.98
print(evals[evals.value > 0.98])
#############################################################################
# Viewing a sample task
# =====================
# Over here we shall briefly take a look at the details of the task.
# We will start by displaying a simple *supervised classification* task:
task_id = 167140 # https://www.openml.org/t/167140
task = openml.tasks.get_task(task_id)
print(task)
#############################################################################