How to use the openml.tasks function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github automl / auto-sklearn / test / test_pipeline / implementations / test_OneHotEncoder.py View on Github external
def test_classification_workflow(self):
        task = openml.tasks.get_task(254)
        X, y = task.get_X_and_y()

        ohe = OneHotEncoder(categorical_features=[True]*22)
        tree = sklearn.tree.DecisionTreeClassifier(random_state=1)
        pipeline = sklearn.pipeline.Pipeline((('ohe', ohe), ('tree', tree)))

        X_train, X_test, y_train, y_test = \
            sklearn.cross_validation.train_test_split(X, y, random_state=3,
                                                      train_size=0.5,
                                                      test_size=0.5)
        pipeline.fit(X_train, y_train)
        self.assertEqual(np.mean(y_train == pipeline.predict(X_train)), 1)
        # With an incorrect copy operation the OneHotEncoder would rearrange
        # the data in such a way that the accuracy would drop to 66%
        self.assertEqual(np.mean(y_test == pipeline.predict(X_test)), 1)
github openml / openml-python / tests / test_utils / test_conditionalimputer.py View on Github external
def test_impute_with_constant(self):
        task_ids = [2]

        for task_id in task_ids:
            task = openml.tasks.get_task(task_id)
            dataset = task.get_dataset()
            X, _ = dataset.get_data(target=task.target_name)
            nominal_indices = dataset.get_features_by_type('nominal', exclude=[task.target_name])
            fill_empty = -1
            clf = ConditionalImputer(strategy="median",
                                     strategy_nominal="most_frequent",
                                     categorical_features=None,
                                     verbose=True,
                                     fill_empty=fill_empty)

            self._do_test(dataset, X, nominal_indices, clf, fill_empty=fill_empty)
github openml / openml-python / tests / test_tasks / test_task_functions.py View on Github external
def test__get_estimation_procedure_list(self):
        estimation_procedures = openml.tasks.functions.\
            _get_estimation_procedure_list()
        self.assertIsInstance(estimation_procedures, list)
        self.assertIsInstance(estimation_procedures[0], dict)
        self.assertEqual(estimation_procedures[0]['task_type_id'], 1)
github openml / openml-python / tests / test_utils / test_utils.py View on Github external
def test_list_all(self):
        openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
github datamllab / autokaggle / examples / benchmarking.py View on Github external
def get_dataset_ids(task_ids):
    """ Fetches the dataset_ids.
    # Arguments
        task_ids: List of ids of OpenML task flows
    # Returns
        dataset_list: List of the dataset Ids
    """
    if type(task_ids) == list:
        return [openml.tasks.get_task(t_id).dataset_id for t_id in task_ids]
    else:
        return openml.tasks.get_task(task_ids).dataset_id
github openml / openml-python / openml / runs / functions.py View on Github external
repeat = int(evaluation_dict['@repeat'])
                    fold = int(evaluation_dict['@fold'])
                    if key not in fold_evaluations:
                        fold_evaluations[key] = OrderedDict()
                    if repeat not in fold_evaluations[key]:
                        fold_evaluations[key][repeat] = OrderedDict()
                    fold_evaluations[key][repeat][fold] = value
                else:
                    evaluations[key] = value

    if 'description' not in files and from_server is True:
        raise ValueError('No description file for run %d in run '
                         'description XML' % run_id)

    if 'predictions' not in files and from_server is True:
        task = openml.tasks.get_task(task_id)
        if task.task_type_id == TaskTypeEnum.SUBGROUP_DISCOVERY:
            raise NotImplementedError(
                'Subgroup discovery tasks are not yet supported.'
            )
        else:
            # JvR: actually, I am not sure whether this error should be raised.
            # a run can consist without predictions. But for now let's keep it
            # Matthias: yes, it should stay as long as we do not really handle
            # this stuff
            raise ValueError('No prediction files for run %d in run '
                             'description XML' % run_id)

    tags = openml.utils.extract_xml_tags('oml:tag', run)

    return OpenMLRun(run_id=run_id, uploader=uploader,
                     uploader_name=uploader_name, task_id=task_id,
github openml / openml-python / openml / __init__.py View on Github external
----------
    task_ids : iterable

    dataset_ids : iterable

    flow_ids : iterable

    run_ids : iterable

    Returns
    -------
    None
    """
    if task_ids is not None:
        for task_id in task_ids:
            tasks.functions.get_task(task_id)

    if dataset_ids is not None:
        for dataset_id in dataset_ids:
            datasets.functions.get_dataset(dataset_id)

    if flow_ids is not None:
        for flow_id in flow_ids:
            flows.functions.get_flow(flow_id)

    if run_ids is not None:
        for run_id in run_ids:
            runs.functions.get_run(run_id)
github openml / openml-python / master / _downloads / 9b437d19257bc26d354f0602a4fbddc1 / tasks_tutorial.py View on Github external
# single task by its ID, and one which takes a list of IDs and downloads
# all of these tasks:

task_id = 31
task = openml.tasks.get_task(task_id)

############################################################################
# Properties of the task are stored as member variables:

print(task)

############################################################################
# And:

ids = [2, 1891, 31, 9983]
tasks = openml.tasks.get_tasks(ids)
print(tasks[0])

############################################################################
# Creating tasks
# ^^^^^^^^^^^^^^
#
# You can also create new tasks. Take the following into account:
#
# * You can only create tasks on _active_ datasets
# * For now, only the following tasks are supported: classification, regression,
# clustering, and learning curve analysis.
# * For now, tasks can only be created on a single dataset.
# * The exact same task must not already exist.
#
# Creating a task requires the following input:
#
github openml / openml-python / master / _downloads / 25a00d3d6385de3b0fbf8dd033ff9db0 / simple_suites_tutorial.py View on Github external
print(suite)

####################################################################################################
# The benchmark suite does not download the included tasks and datasets itself, but only contains
# a list of which tasks constitute the study.
#
# Tasks can then be accessed via

tasks = suite.tasks
print(tasks)

####################################################################################################
# and iterated over for benchmarking. For speed reasons we only iterate over the first three tasks:

for task_id in tasks[:3]:
    task = openml.tasks.get_task(task_id)
    print(task)