Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __list_evaluations(api_call, output_format='object'):
"""Helper function to parse API calls which are lists of runs"""
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',))
# Minimalistic check if the XML is useful
if 'oml:evaluations' not in evals_dict:
raise ValueError('Error in return XML, does not contain '
'"oml:evaluations": %s' % str(evals_dict))
assert type(evals_dict['oml:evaluations']['oml:evaluation']) == list, \
type(evals_dict['oml:evaluations'])
evals = collections.OrderedDict()
uploader_ids = list(set([eval_['oml:uploader'] for eval_ in
evals_dict['oml:evaluations']['oml:evaluation']]))
api_users = "user/list/user_id/" + ','.join(uploader_ids)
xml_string_user = openml._api_calls._perform_api_call(api_users, 'get')
users = xmltodict.parse(xml_string_user, force_list=('oml:user',))
user_dict = {user['oml:id']: user['oml:username'] for user in users['oml:users']['oml:user']}
bool
True iff the deletion was successful. False otherwse
"""
legal_entities = {
'data',
'flow',
'task',
'run',
'study',
'user',
}
if entity_type not in legal_entities:
raise ValueError('Can\'t delete a %s' % entity_type)
url_suffix = '%s/%d' % (entity_type, entity_id)
result_xml = openml._api_calls._perform_api_call(url_suffix,
'delete')
result = xmltodict.parse(result_xml)
if 'oml:%s_delete' % entity_type in result:
return True
else:
return False
def _get_estimation_procedure_list():
"""Return a list of all estimation procedures which are on OpenML.
Returns
-------
procedures : list
A list of all estimation procedures. Every procedure is represented by
a dictionary containing the following information: id, task type id,
name, type, repeats, folds, stratified.
"""
url_suffix = "estimationprocedure/list"
xml_string = openml._api_calls._perform_api_call(url_suffix,
'get')
procs_dict = xmltodict.parse(xml_string)
# Minimalistic check if the XML is useful
if 'oml:estimationprocedures' not in procs_dict:
raise ValueError('Error in return XML, does not contain tag '
'oml:estimationprocedures.')
elif '@xmlns:oml' not in procs_dict['oml:estimationprocedures']:
raise ValueError('Error in return XML, does not contain tag '
'@xmlns:oml as a child of oml:estimationprocedures.')
elif procs_dict['oml:estimationprocedures']['@xmlns:oml'] != \
'http://openml.org/openml':
raise ValueError('Error in return XML, value of '
'oml:estimationprocedures/@xmlns:oml is not '
'http://openml.org/openml, but %s' %
str(procs_dict['oml:estimationprocedures'][
def __list_flows(
api_call: str,
output_format: str = 'dict'
) -> Union[Dict, pd.DataFrame]:
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))
# Minimalistic check if the XML is useful
assert type(flows_dict['oml:flows']['oml:flow']) == list, \
type(flows_dict['oml:flows'])
assert flows_dict['oml:flows']['@xmlns:oml'] == \
'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml']
flows = dict()
for flow_ in flows_dict['oml:flows']['oml:flow']:
fid = int(flow_['oml:id'])
flow = {'id': fid,
'full_name': flow_['oml:full_name'],
'name': flow_['oml:name'],
'version': flow_['oml:version'],
'external_version': flow_['oml:external_version'],
study_id : int
OpenML id of the study
run_ids : list (int)
List of entities to link to the collection
Returns
-------
int
new size of the study (in terms of explicitly linked entities)
"""
# Interestingly, there's no need to tell the server about the entity type, it knows by itself
uri = 'study/%d/attach' % study_id
post_variables = {'ids': ','.join(str(x) for x in run_ids)}
result_xml = openml._api_calls._perform_api_call(uri, 'post', post_variables)
result = xmltodict.parse(result_xml)['oml:study_attach']
return int(result['oml:linked_entities'])
-------
dict
XML Dataset description parsed to a dict.
"""
# TODO implement a cache for this that invalidates itself after some time
# This can be saved on disk, but cannot be cached properly, because
# it contains the information on whether a dataset is active.
description_file = os.path.join(did_cache_dir, "description.xml")
try:
return _get_cached_dataset_description(dataset_id)
except OpenMLCacheException:
url_extension = "data/{}".format(dataset_id)
dataset_xml = openml._api_calls._perform_api_call(url_extension, 'get')
with io.open(description_file, "w", encoding='utf8') as fh:
fh.write(dataset_xml)
description = xmltodict.parse(dataset_xml)[
"oml:data_set_description"]
return description
Returns
-------
flow_exist : int or bool
flow id iff exists, False otherwise
Notes
-----
see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
"""
if not (isinstance(name, str) and len(name) > 0):
raise ValueError('Argument \'name\' should be a non-empty string')
if not (isinstance(name, str) and len(external_version) > 0):
raise ValueError('Argument \'version\' should be a non-empty string')
xml_response = openml._api_calls._perform_api_call(
"flow/exists",
'post',
data={'name': name, 'external_version': external_version},
)
result_dict = xmltodict.parse(xml_response)
flow_id = int(result_dict['oml:flow_exists']['oml:id'])
if flow_id > 0:
return flow_id
else:
return False
def __list_studies(api_call, output_format='object') -> Union[Dict, pd.DataFrame]:
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
study_dict = xmltodict.parse(xml_string, force_list=('oml:study',))
# Minimalistic check if the XML is useful
assert type(study_dict['oml:study_list']['oml:study']) == list, \
type(study_dict['oml:study_list'])
assert study_dict['oml:study_list']['@xmlns:oml'] == \
'http://openml.org/openml', study_dict['oml:study_list']['@xmlns:oml']
studies = dict()
for study_ in study_dict['oml:study_list']['oml:study']:
# maps from xml name to a tuple of (dict name, casting fn)
expected_fields = {
'oml:id': ('id', int),
'oml:alias': ('alias', str),
'oml:main_entity_type': ('main_entity_type', str),
'oml:benchmark_suite': ('benchmark_suite', int),
def __list_datasets(api_call, output_format='dict'):
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))
# Minimalistic check if the XML is useful
assert type(datasets_dict['oml:data']['oml:dataset']) == list, \
type(datasets_dict['oml:data'])
assert datasets_dict['oml:data']['@xmlns:oml'] == \
'http://openml.org/openml', datasets_dict['oml:data']['@xmlns:oml']
datasets = dict()
for dataset_ in datasets_dict['oml:data']['oml:dataset']:
ignore_attribute = ['oml:file_id', 'oml:quality']
dataset = {k.replace('oml:', ''): v
for (k, v) in dataset_.items()
if k not in ignore_attribute}
dataset['did'] = int(dataset['did'])
dataset['version'] = int(dataset['version'])