Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
task_id : int or str
The OpenML task id.
download_data : bool
Option to trigger download of data along with the meta data.
Returns
-------
task
"""
try:
task_id = int(task_id)
except (ValueError, TypeError):
raise ValueError("Dataset ID is neither an Integer nor can be "
"cast to an Integer.")
tid_cache_dir = openml.utils._create_cache_directory_for_id(
TASKS_CACHE_DIR_NAME, task_id,
)
try:
task = _get_task_description(task_id)
dataset = get_dataset(task.dataset_id, download_data)
# List of class labels availaible in dataset description
# Including class labels as part of task meta data handles
# the case where data download was initially disabled
if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
task.class_labels = \
dataset.retrieve_class_labels(task.target_name)
# Clustering tasks do not have class labels
# and do not offer download_split
if download_data:
if isinstance(task, OpenMLSupervisedTask):
Returns
-------
dataset : :class:`openml.OpenMLDataset`
The downloaded dataset.
"""
if isinstance(dataset_id, str):
try:
dataset_id = int(dataset_id)
except ValueError:
dataset_id = _name_to_id(dataset_id, version, error_if_multiple) # type: ignore
elif not isinstance(dataset_id, int):
raise TypeError("`dataset_id` must be one of `str` or `int`, not {}."
.format(type(dataset_id)))
did_cache_dir = _create_cache_directory_for_id(
DATASETS_CACHE_DIR_NAME, dataset_id,
)
try:
remove_dataset_cache = True
description = _get_dataset_description(did_cache_dir, dataset_id)
features = _get_dataset_features(did_cache_dir, dataset_id)
try:
qualities = _get_dataset_qualities(did_cache_dir, dataset_id)
except OpenMLServerException as e:
if e.code == 362 and str(e) == 'No qualities found - None':
logger.warning("No qualities found for dataset {}".format(dataset_id))
qualities = None
else:
raise
def _get_cached_dataset_description(dataset_id):
did_cache_dir = _create_cache_directory_for_id(
DATASETS_CACHE_DIR_NAME, dataset_id,
)
description_file = os.path.join(did_cache_dir, "description.xml")
try:
with io.open(description_file, encoding='utf8') as fh:
dataset_xml = fh.read()
return xmltodict.parse(dataset_xml)["oml:data_set_description"]
except (IOError, OSError):
raise OpenMLCacheException(
"Dataset description for dataset id %d not "
"cached" % dataset_id)
Parameters
----------
flow_id : int
The OpenML flow id.
Returns
-------
OpenMLFlow
"""
try:
return _get_cached_flow(flow_id)
except OpenMLCacheException:
xml_file = os.path.join(
openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
"flow.xml",
)
flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id, request_method='get')
with io.open(xml_file, "w", encoding='utf8') as fh:
fh.write(flow_xml)
return _create_flow_from_xml(flow_xml)
def _get_task_description(task_id):
try:
return _get_cached_task(task_id)
except OpenMLCacheException:
xml_file = os.path.join(
openml.utils._create_cache_directory_for_id(
TASKS_CACHE_DIR_NAME,
task_id,
),
"task.xml",
)
task_xml = openml._api_calls._perform_api_call("task/%d" % task_id,
'get')
with io.open(xml_file, "w", encoding='utf8') as fh:
fh.write(task_xml)
return _create_task_from_xml(task_xml)
Parameters
----------
run_id : int
ignore_cache : bool
Whether to ignore the cache. If ``true`` this will download and overwrite the run xml
even if the requested run is already cached.
ignore_cache
Returns
-------
run : OpenMLRun
Run corresponding to ID, fetched from the server.
"""
run_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME,
run_id)
run_file = os.path.join(run_dir, "description.xml")
if not os.path.exists(run_dir):
os.makedirs(run_dir)
try:
if not ignore_cache:
return _get_cached_run(run_id)
else:
raise OpenMLCacheException(message='dummy')
except OpenMLCacheException:
run_xml = openml._api_calls._perform_api_call("run/%d" % run_id, 'get')
with io.open(run_file, "w", encoding='utf8') as fh:
fh.write(run_xml)
output_filename : string
Location of ARFF file.
"""
if isinstance(description, dict):
md5_checksum_fixture = description.get("oml:md5_checksum")
url = description['oml:url']
did = description.get('oml:id')
elif isinstance(description, OpenMLDataset):
md5_checksum_fixture = description.md5_checksum
url = description.url
did = description.dataset_id
else:
raise TypeError("`description` should be either OpenMLDataset or Dict.")
if cache_directory is None:
cache_directory = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, did)
output_file_path = os.path.join(cache_directory, "dataset.arff")
try:
openml._api_calls._download_text_file(
source=url,
output_path=output_file_path,
md5_checksum=md5_checksum_fixture
)
except OpenMLHashException as e:
additional_info = " Raised when downloading dataset {}.".format(did)
e.args = (e.args[0] + additional_info,)
raise
return output_file_path
def _get_cached_flow(fid: int) -> OpenMLFlow:
"""Get the cached flow with the given id.
Parameters
----------
fid : int
Flow id.
Returns
-------
OpenMLFlow.
"""
fid_cache_dir = openml.utils._create_cache_directory_for_id(
FLOWS_CACHE_DIR_NAME,
fid
)
flow_file = os.path.join(fid_cache_dir, "flow.xml")
try:
with io.open(flow_file, encoding='utf8') as fh:
return _create_flow_from_xml(fh.read())
except (OSError, IOError):
openml.utils._remove_cache_dir_for_id(FLOWS_CACHE_DIR_NAME, fid_cache_dir)
raise OpenMLCacheException("Flow file for fid %d not "
"cached" % fid)