Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
memory='32',
cores='8'
),
parameters_meta_info=OrderedDict(
cores=OrderedDict(description='number of available cores', data_type='int'),
memory=OrderedDict(description='memory in gigabytes', data_type='int'),
time=OrderedDict(description='time in minutes', data_type='int'),
),
language='English',
tags=['amlb', 'benchmark', 'study_218'],
dependencies='amlb==0.9',
model=None
)
autosklearn_flow = openml.flows.get_flow(15275) # auto-sklearn 0.5.1
autosklearn_amlb_flow = openml.flows.OpenMLFlow(
name='automlbenchmark_autosklearn',
description=('Auto-sklearn as set up by the AutoML Benchmark'
'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
components=OrderedDict(automl_tool=autosklearn_flow),
**standard_kwargs
)
autosklearn_amlb_flow.publish()
print(f'autosklearn flow created: {autosklearn_amlb_flow.flow_id}')
# for dev purposes, since we're rerunning this often, we want to double-check no new flows are created
assert autosklearn_amlb_flow.flow_id == 15509, "! NEW FLOW CREATED UNEXPECTEDLY!"
tpot_flow = openml.flows.get_flow(15508) # TPOT 0.9.6
tpot_amlb_flow = openml.flows.OpenMLFlow(
name='automlbenchmark_tpot',
description=('TPOT as set up by the AutoML Benchmark'
'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
def is_subcomponent_specification(values):
# checks whether the current value can be a specification of
# subcomponents, as for example the value for steps parameter
# (in Pipeline) or transformers parameter (in
# ColumnTransformer). These are always lists/tuples of lists/
# tuples, size bigger than 2 and an OpenMLFlow item involved.
if not isinstance(values, (tuple, list)):
return False
for item in values:
if not isinstance(item, (tuple, list)):
return False
if len(item) < 2:
return False
if not isinstance(item[1], openml.flows.OpenMLFlow):
return False
return True
trace_path = os.path.join(directory, 'trace.arff')
model_path = os.path.join(directory, 'model.pkl')
if not os.path.isfile(description_path):
raise ValueError('Could not find description.xml')
if not os.path.isfile(predictions_path):
raise ValueError('Could not find predictions.arff')
if not os.path.isfile(model_path) and expect_model:
raise ValueError('Could not find model.pkl')
with open(description_path, 'r') as fht:
xml_string = fht.read()
run = openml.runs.functions._create_run_from_xml(xml_string, from_server=False)
if run.flow_id is None:
flow = openml.flows.OpenMLFlow.from_filesystem(directory)
run.flow = flow
run.flow_name = flow.name
with open(predictions_path, 'r') as fht:
predictions = arff.load(fht)
run.data_content = predictions['data']
if os.path.isfile(model_path):
# note that it will load the model if the file exists, even if
# expect_model is False
with open(model_path, 'rb') as fhb:
run.model = pickle.load(fhb)
if os.path.isfile(trace_path):
run.trace = openml.runs.OpenMLRunTrace._from_filesystem(trace_path)
flow2 : OpenMLFlow
ignore_parameter_values_on_older_children : str (optional)
If set to ``OpenMLFlow.upload_date``, ignores parameters in a child
flow if it's upload date predates the upload date of the parent flow.
ignore_parameter_values : bool
Whether to ignore parameter values when comparing flows.
ignore_custom_name_if_none : bool
Whether to ignore the custom name field if either flow has `custom_name` equal to `None`.
check_description : bool
Whether to ignore matching of flow descriptions.
"""
if not isinstance(flow1, OpenMLFlow):
raise TypeError('Argument 1 must be of type OpenMLFlow, but is %s' %
type(flow1))
if not isinstance(flow2, OpenMLFlow):
raise TypeError('Argument 2 must be of type OpenMLFlow, but is %s' %
type(flow2))
# TODO as they are actually now saved during publish, it might be good to
# check for the equality of these as well.
generated_by_the_server = ['flow_id', 'uploader', 'version', 'upload_date',
# Tags aren't directly created by the server,
# but the uploader has no control over them!
'tags']
ignored_by_python_api = ['binary_url', 'binary_format', 'binary_md5',
'model', '_entity_id']
external_version = self._get_external_version_string(model, subcomponents)
dependencies = '\n'.join([
self._format_external_version(
'sklearn',
sklearn.__version__,
),
'numpy>=1.6.1',
'scipy>=0.9',
])
sklearn_version = self._format_external_version('sklearn', sklearn.__version__)
sklearn_version_formatted = sklearn_version.replace('==', '_')
sklearn_description = self._get_sklearn_description(model)
flow = OpenMLFlow(name=name,
class_name=class_name,
custom_name=short_name,
description=sklearn_description,
model=model,
components=subcomponents,
parameters=parameters,
parameters_meta_info=parameters_meta_info,
external_version=external_version,
tags=['openml-python', 'sklearn', 'scikit-learn',
'python', sklearn_version_formatted,
# TODO: add more tags based on the scikit-learn
# module a flow is in? For example automatically
# annotate a class of sklearn.svm.SVC() with the
# tag svm?
],
extension=self,
def _get_rest_api_type_alias(oml_object: 'OpenMLBase') -> str:
""" Return the alias of the openml entity as it is defined for the REST API. """
rest_api_mapping = [
(openml.datasets.OpenMLDataset, 'data'),
(openml.flows.OpenMLFlow, 'flow'),
(openml.tasks.OpenMLTask, 'task'),
(openml.runs.OpenMLRun, 'run'),
((openml.study.OpenMLStudy, openml.study.OpenMLBenchmarkSuite), 'study')
] # type: List[Tuple[Union[Type, Tuple], str]]
_, api_type_alias = [(python_type, api_alias)
for (python_type, api_alias) in rest_api_mapping
if isinstance(oml_object, python_type)][0]
return api_type_alias
sub_components_names += "," + sub_components[key].name
if sub_components_names:
# slice operation on string in order to get rid of leading comma
name = '%s(%s)' % (class_name, sub_components_names[1:])
else:
name = class_name
# Get the external versions of all sub-components
external_version = _get_external_version_string(model, sub_components)
dependencies = [_format_external_version('sklearn', sklearn.__version__),
'numpy>=1.6.1', 'scipy>=0.9']
dependencies = '\n'.join(dependencies)
flow = OpenMLFlow(name=name,
class_name=class_name,
description='Automatically created scikit-learn flow.',
model=model,
components=sub_components,
parameters=parameters,
parameters_meta_info=parameters_meta_info,
external_version=external_version,
tags=[],
language='English',
# TODO fill in dependencies!
dependencies=dependencies)
return flow
flow if it's upload date predates the upload date of the parent flow.
ignore_parameter_values : bool
Whether to ignore parameter values when comparing flows.
ignore_custom_name_if_none : bool
Whether to ignore the custom name field if either flow has `custom_name` equal to `None`.
check_description : bool
Whether to ignore matching of flow descriptions.
"""
if not isinstance(flow1, OpenMLFlow):
raise TypeError('Argument 1 must be of type OpenMLFlow, but is %s' %
type(flow1))
if not isinstance(flow2, OpenMLFlow):
raise TypeError('Argument 2 must be of type OpenMLFlow, but is %s' %
type(flow2))
# TODO as they are actually now saved during publish, it might be good to
# check for the equality of these as well.
generated_by_the_server = ['flow_id', 'uploader', 'version', 'upload_date',
# Tags aren't directly created by the server,
# but the uploader has no control over them!
'tags']
ignored_by_python_api = ['binary_url', 'binary_format', 'binary_md5',
'model', '_entity_id']
for key in set(flow1.__dict__.keys()).union(flow2.__dict__.keys()):
if key in generated_by_the_server + ignored_by_python_api:
continue
attr1 = getattr(flow1, key, None)
print(f'h2o flow created: {h2o_amlb_flow.flow_id}')
assert h2o_amlb_flow.flow_id == 16115, "! NEW FLOW CREATED UNEXPECTEDLY!"
autoweka_amlb_flow = openml.flows.OpenMLFlow(
name='automlbenchmark_autoweka',
description=('Auto-WEKA 2.6 as set up by the AutoML Benchmark'
'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
components=OrderedDict(),
**standard_kwargs
)
autoweka_amlb_flow.publish()
print(f'autoweka flow created: {autoweka_amlb_flow.flow_id}')
assert autoweka_amlb_flow.flow_id == 16116, "! NEW FLOW CREATED UNEXPECTEDLY!"
rf_flow = openml.flows.get_flow(16117)
rf_amlb_flow = openml.flows.OpenMLFlow(
name='automlbenchmark_randomforest',
description=('Random Forest baseline as set up by the AutoML Benchmark'
'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
components=OrderedDict(randomforest=rf_flow),
**standard_kwargs
)
rf_amlb_flow.publish()
print(f'rf flow created: {rf_amlb_flow.flow_id}')
assert rf_amlb_flow.flow_id == 16118, "! NEW FLOW CREATED UNEXPECTEDLY!"
trf_amlb_flow = openml.flows.OpenMLFlow(
name='automlbenchmark_tunedrandomforest',
description=('Tuned Random Forest baseline as set up by the AutoML Benchmark'
'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
components=OrderedDict(randomforest=rf_flow),
**standard_kwargs