How to use the openml.tasks.get_task function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openml / openml-python / tests / test_extensions / test_sklearn_extension / test_sklearn_extension.py View on Github external
def test_openml_param_name_to_sklearn(self):
        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
        boosting = sklearn.ensemble.AdaBoostClassifier(
            base_estimator=sklearn.tree.DecisionTreeClassifier())
        model = sklearn.pipeline.Pipeline(steps=[
            ('scaler', scaler), ('boosting', boosting)])
        flow = self.extension.model_to_flow(model)
        task = openml.tasks.get_task(115)
        run = openml.runs.run_flow_on_task(flow, task)
        run = run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
        run = openml.runs.get_run(run.run_id)
        setup = openml.setups.get_setup(run.setup_id)

        # make sure to test enough parameters
        self.assertGreater(len(setup.parameters), 15)

        for parameter in setup.parameters.values():
            sklearn_name = self.extension._openml_param_name_to_sklearn(parameter, flow)

            # test the inverse. Currently, OpenML stores the hyperparameter
            # fullName as flow.name + flow.version + parameter.name on the
            # server (but this behaviour is not documented and might or might
github openml / openml-python / tests / test_extensions / test_sklearn_extension / test_sklearn_extension.py View on Github external
def test_run_model_on_fold_classification_2(self):
        task = openml.tasks.get_task(7)

        X, y = task.get_X_and_y()
        train_indices, test_indices = task.get_train_test_split_indices(
            repeat=0, fold=0, sample=0)
        X_train = X[train_indices]
        y_train = y[train_indices]
        X_test = X[test_indices]
        y_test = y[test_indices]

        pipeline = sklearn.model_selection.GridSearchCV(
            sklearn.tree.DecisionTreeClassifier(),
            {
                "max_depth": [1, 2],
            },
        )
        # TODO add some mocking here to actually test the innards of this function, too!
github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
]

        def _remove_random_state(flow):
            if 'random_state' in flow.parameters:
                del flow.parameters['random_state']
            for component in flow.components.values():
                _remove_random_state(component)

        flow = self.extension.model_to_flow(clf)
        flow, _ = self._add_sentinel_to_flow_name(flow, sentinel)
        if not openml.flows.flow_exists(flow.name, flow.external_version):
            flow.publish()
            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
            TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))

        task = openml.tasks.get_task(task_id)

        X, y = task.get_X_and_y()
        self.assertEqual(np.count_nonzero(np.isnan(X)), n_missing_vals)
        run = openml.runs.run_flow_on_task(
            flow=flow,
            task=task,
            seed=seed,
            avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
        )
        run_ = run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
        TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
        self.assertEqual(run_, run)
        self.assertIsInstance(run.dataset_id, int)

        # This is only a smoke check right now
github openml / openml-python / tests / test_tasks / test_task_functions.py View on Github external
def test__get_task(self):
        openml.config.cache_directory = self.static_cache_dir
        openml.tasks.get_task(1882)
github datamllab / autokaggle / examples / benchmarking.py View on Github external
def get_dataset_splits(self, task_id):
        """ Get the train/test splits for the given task
        # Arguments
            task_id: Id of OpenML task flow
        # Returns
            Train/Test datasets in numpy array format
        """
        task = openml.tasks.get_task(task_id)
        train_indices, test_indices = task.get_train_test_split_indices()
        dataset = task.get_dataset()
        X, y, categorical_indicator, attribute_names = dataset.get_data(
            target=task.target_name, dataset_format='array')

        x_train, y_train = X[train_indices], y[train_indices]
        x_test, y_test = X[test_indices], y[test_indices]
        return x_train, y_train, x_test, y_test
github neurodata / SPORF / Python / benchmarks / benchmark_utils.py View on Github external
n_iterations=10,
                  preprocess=False,
                  train_test_splits = None,
                  rerfs=['binnedBaseRerF'], 
                  rerfs_kwargs=[None],
                  rerf_param_keyword='trees',
                  sklearns=['RandomForest'],
                  sklearns_kwargs=[None],
                  sklearn_param_keyword='n_estimators',
                  param_values = range(20, 41, 20),
                  return_predictions = False,
                  verbose=True,
                  acorn=None
    ):

    task = openml.tasks.get_task(oml_task_id)
    X, y = task.get_X_and_y()
    
    if len(rerfs) > len(rerfs_kwargs):
        if len(rerfs_kwargs) == 1:
            rerfs_kwargs = [rerfs_kwargs[0] for model in rerfs]
        else:
            raise ValueError('bad rerfs_kwargs')

    if len(sklearns) > len(sklearns_kwargs):
        if len(sklearns_kwargs) == 1:
            sklearns_kwargs = [sklearns_kwargs[0] for model in sklearns]
        else:
            raise ValueError('bad sklearns_kwargs')

    if preprocess:
        # TODO
github openml / openml-python / circle_drop / _downloads / tasks_tutorial.py View on Github external
############################################################################
# Exercise
# ########
#
# Search for the tasks on the 'eeg-eye-state' dataset.

tasks.query('name=="eeg-eye-state"')

############################################################################
# Downloading tasks
# ^^^^^^^^^^^^^^^^^
#
# We provide two functions to download tasks, one which downloads only a single task by its ID, and one which takes a list of IDs and downloads all of these tasks:

task_id = 1
task = openml.tasks.get_task(task_id)

############################################################################
# Properties of the task are stored as member variables:

pprint(vars(task))

############################################################################
# And:

ids = [1, 2, 19, 97, 403]
tasks = openml.tasks.get_tasks(ids)
pprint(tasks[0])
github openml / automlbenchmark / scripts / output_to_openml.py View on Github external
:param benchmark: benchmark name containing allowed resources, e.g. 'medium-8c4h'
    :param framework: framework name
    :param task_id: openml task id
    :param predictions: mapping for fold->predictions file
    :return: an OpenML run connected between the right task and flow, and associated predictions.
    """
    cores, memory, time = parse_resource_parameters(benchmark)
    flow_id = amlb_flows[framework]

    parameters = [
        OrderedDict([('oml:name', 'cores'), ('oml:value', cores), ('oml:component', flow_id)]),
        OrderedDict([('oml:name', 'memory'), ('oml:value', memory), ('oml:component', flow_id)]),
        OrderedDict([('oml:name', 'time'), ('oml:value', time), ('oml:component', flow_id)]),
    ]

    task = openml.tasks.get_task(task_id)
    dataset_id = task.get_dataset().dataset_id

    benchmark_command = f'python3 runbenchmark.py {framework} {benchmark} -m aws -t {task_id}'

    predictions = load_format_predictions(task_id, predictions)

    return openml.runs.OpenMLRun(
        task_id=task_id, flow_id=flow_id, dataset_id=dataset_id,
        parameter_settings=parameters,
        setup_string=benchmark_command,
        data_content=predictions,
        tags=['study_218']
    )