How to use the bigml.api.get_dataset_id function in bigml

To help you get started, we’ve selected a few bigml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bigmlcom / bigmler / bigmler / centroid.py View on Github external
message = u.dated("Batch centroid not found. Resuming.\n")
        resume, batch_centroid = c.checkpoint(
            c.is_batch_centroid_created, path, debug=args.debug,
            message=message, log_file=session_file, console=args.verbosity)
    if not resume:
        batch_centroid = create_batch_centroid(
            cluster_id, test_dataset, batch_centroid_args,
            args, api, session_file=session_file, path=path, log=log)
    if not args.no_csv:
        file_name = api.download_batch_centroid(batch_centroid,
                                                prediction_file)
        if file_name is None:
            sys.exit("Failed downloading CSV.")
    if args.to_dataset:
        batch_centroid = bigml.api.check_resource(batch_centroid, api=api)
        new_dataset = bigml.api.get_dataset_id(
            batch_centroid['object']['output_dataset_resource'])
        if new_dataset is not None:
            message = u.dated("Batch centroid dataset created: %s\n"
                              % u.get_url(new_dataset))
            u.log_message(message, log_file=session_file,
                          console=args.verbosity)
            u.log_created_resources("batch_centroid_dataset",
                                    path, new_dataset, mode='a')
github bigmlcom / bigmler / bigmler / dnprediction.py View on Github external
message = u.dated("Batch prediction not found. Resuming.\n")
        resume, batch_prediction = c.checkpoint(
            c.is_batch_prediction_created, path, debug=args.debug,
            message=message, log_file=session_file, console=args.verbosity)
    if not resume:
        batch_prediction = create_batch_prediction(
            deepnet_id, test_dataset, batch_prediction_args,
            args, api, session_file=session_file, path=path, log=log)
    if not args.no_csv:
        file_name = api.download_batch_prediction(batch_prediction,
                                                  prediction_file)
        if file_name is None:
            sys.exit("Failed downloading CSV.")
    if args.to_dataset:
        batch_prediction = bigml.api.check_resource(batch_prediction, api=api)
        new_dataset = bigml.api.get_dataset_id(
            batch_prediction['object']['output_dataset_resource'])
        if new_dataset is not None:
            message = u.dated("Batch prediction dataset created: %s\n"
                              % u.get_url(new_dataset))
            u.log_message(message, log_file=session_file,
                          console=args.verbosity)
            u.log_created_resources("batch_prediction_dataset",
                                    path, new_dataset, mode='a')
github bigmlcom / bigmler / bigmler / dispatcher.py View on Github external
api, args, resume, session_file=session_file,
                    path=path, log=log)

                (test_source, resume, csv_properties,
                 test_fields) = test_properties
            else:
                test_source_id = bigml.api.get_source_id(args.test_source)
                test_source = api.check_resource(test_source_id)
            if test_dataset is None:
            # create test dataset from test source
                dataset_args = rds.set_basic_dataset_args(args, name=test_name)
                test_dataset, resume = pd.alternative_dataset_processing(
                    test_source, "test", dataset_args, api, args,
                    resume, session_file=session_file, path=path, log=log)
            else:
                test_dataset_id = bigml.api.get_dataset_id(test_dataset)
                test_dataset = api.check_resource(test_dataset_id)

            csv_properties.update(objective_field=None,
                                  objective_field_present=False)
            test_fields = pd.get_fields_structure(test_dataset,
                                                  csv_properties)

            if args.to_dataset and args.dataset_off:
                model = api.check_resource(model['resource'],
                                           query_string=r.ALL_FIELDS_QS)
                model_fields = Fields(model)
                objective_field_name = model_fields.field_name( \
                    model_fields.objective_field)
                if objective_field_name in test_fields.fields_by_name.keys():
                    args.prediction_name = "%s (predicted)" % \
                        objective_field_name
github bigmlcom / bigmler / bigmler / checkpoint.py View on Github external
def is_dataset_created(path, suffix=""):
    """Checks existence and reads the dataset id from the dataset file in
       the path directory

    """
    dataset_id = None
    try:
        with open("%s%sdataset%s" % (path, os.sep, suffix)) as dataset_file:
            dataset_id = dataset_file.readline().strip()
            try:
                dataset_id = bigml.api.get_dataset_id(dataset_id)
                return True, dataset_id
            except ValueError:
                return False, None
    except IOError:
        return False, None
github bigmlcom / bigmler / bigmler / logisticregression / dispatcher.py View on Github external
test_properties = ps.test_source_processing(
                    api, args, resume, name=test_name,
                    session_file=session_file, path=path, log=log)
                (test_source, resume,
                 csv_properties, test_fields) = test_properties
            else:
                test_source_id = bigml.api.get_source_id(args.test_source)
                test_source = api.check_resource(test_source_id)
            if test_dataset is None:
                # create test dataset from test source
                dataset_args = set_basic_dataset_args(args, name=test_name)
                test_dataset, resume = pd.alternative_dataset_processing(
                    test_source, "test", dataset_args, api, args,
                    resume, session_file=session_file, path=path, log=log)
            else:
                test_dataset_id = bigml.api.get_dataset_id(test_dataset)
                test_dataset = api.check_resource(test_dataset_id)

            csv_properties.update(objective_field=None,
                                  objective_field_present=False)
            test_fields = pd.get_fields_structure(test_dataset,
                                                  csv_properties)
            batch_prediction_args = rbp.set_batch_prediction_args(
                args, fields=fields,
                dataset_fields=test_fields)

            remote_prediction(logistic_regression, test_dataset, \
                batch_prediction_args, args, \
                api, resume, prediction_file=output, \
                session_file=session_file, path=path, log=log)

        else:
github bigmlcom / bigmler / bigmler / analyze / k_fold_cv.py View on Github external
fields = None
    dataset = None
    datasets = []
    if args.dataset_file:
        # dataset is retrieved from the contents of the given local JSON file
        model_dataset, csv_properties, fields = u.read_local_resource(
            args.dataset_file,
            csv_properties=csv_properties)
        if not args.datasets:
            datasets = [model_dataset]
            dataset = model_dataset
        else:
            datasets = u.read_datasets(args.datasets)
        dataset_id = dataset['resource']
    elif args.dataset:
        dataset_id = bigml.api.get_dataset_id(args.dataset)
        datasets = [dataset_id]
    elif args.dataset_ids:
        datasets = args.dataset_ids
        dataset_id = datasets[0]

    if dataset_id:
        if not dataset:
            dataset = api.check_resource(dataset_id,
                                         query_string=ALL_FIELDS_QS)
        try:
            args.objective_field = int(args.objective_field)
        except (TypeError, ValueError):
            pass
        # if the user provided no objective field, try to use the one in the
        # dataset
        if args.objective_field is None:
github bigmlcom / bigmler / bigmler / processing / datasets.py View on Github external
def csv_name(user_filename, path, dataset):
    """Building CSV exported dataset filename from the user-given value

    """
    if user_filename == '':
        dataset_id = bigml.api.get_dataset_id(dataset)
        if dataset_id:
            return os.path.join(path,
                                "%s.csv" % dataset_id.replace("/", "_"))
    return os.path.join(path, user_filename)
github bigmlcom / bigmler / bigmler / processing / datasets.py View on Github external
dataset_args = r.set_dataset_args(args, fields,
                                          multi_label_data=multi_label_data)
        dataset = r.create_dataset(source, dataset_args, args, api,
                                   path, session_file, log)

    # If set of datasets is provided, let's check their ids.
    elif args.dataset_ids:
        for i in range(0, len(args.dataset_ids)):
            dataset_id = args.dataset_ids[i]
            if isinstance(dataset_id, dict) and "id" in dataset_id:
                dataset_id = dataset_id["id"]
            datasets.append(bigml.api.get_dataset_id(dataset_id))
        dataset = datasets[0]
    # If a dataset is provided, let's retrieve it.
    elif args.dataset:
        dataset = bigml.api.get_dataset_id(args.dataset)

    # If we already have a dataset, we check the status and get the fields if
    # we hadn't them yet.
    if dataset:
        dataset = r.get_dataset(dataset, api, args.verbosity, session_file)

        if ('object' in dataset and 'objective_field' in dataset['object'] and
                'column_number' in dataset['object']['objective_field']):
            dataset_objective = dataset[
                'object']['objective_field']['column_number']
            csv_properties.update(objective_field=dataset_objective,
                                  objective_field_present=True)

        fields = get_fields_structure(dataset, csv_properties)

        if args.public_dataset:
github bigmlcom / bigmler / bigmler / deepnet / dispatcher.py View on Github external
test_properties = ps.test_source_processing(
                    api, args, resume, name=test_name,
                    session_file=session_file, path=path, log=log)
                (test_source, resume,
                 csv_properties, test_fields) = test_properties
            else:
                test_source_id = bigml.api.get_source_id(args.test_source)
                test_source = api.check_resource(test_source_id)
            if test_dataset is None:
                # create test dataset from test source
                dataset_args = set_basic_dataset_args(args, name=test_name)
                test_dataset, resume = pd.alternative_dataset_processing(
                    test_source, "test", dataset_args, api, args,
                    resume, session_file=session_file, path=path, log=log)
            else:
                test_dataset_id = bigml.api.get_dataset_id(test_dataset)
                test_dataset = api.check_resource(test_dataset_id)

            csv_properties.update(objective_field=None,
                                  objective_field_present=False)
            test_fields = pd.get_fields_structure(test_dataset,
                                                  csv_properties)
            batch_prediction_args = set_batch_prediction_args(
                args, fields=fields,
                dataset_fields=test_fields)

            remote_dn_prediction(deepnet, test_dataset, \
                batch_prediction_args, args, \
                api, resume, prediction_file=output, \
                session_file=session_file, path=path, log=log)

        else: