Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if test_dataset is None:
test_dataset = get_test_dataset(args)
# Remote projections: projections are computed as batch projections
# in bigml.com except when --no-batch flag is set on
if args.remote and not args.no_batch:
# create test source from file
test_name = "%s - test" % args.name
if args.test_source is None:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
csv_properties.update(objective_field=None,
objective_field_present=False)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_projection_args = set_batch_projection_args(
if test_dataset is None:
test_dataset = get_test_dataset(args)
# Remote predictions: predictions are computed as batch predictions
# in bigml.com except when --no-batch flag is set on
if args.remote and not args.no_batch:
# create test source from file
test_name = "%s - test" % args.name
if args.test_source is None:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
csv_properties.update(objective_field=None,
objective_field_present=False)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_prediction_args = set_batch_prediction_args(
# --test-datasets
if test_dataset is None:
test_dataset = get_test_dataset(args)
# Remote anomaly scores: scores are computed as batch anomaly scores
# in bigml.com except when --no-batch flag is set on
if args.remote and not args.no_batch:
# create test source from file
test_name = "%s - test" % args.name
if args.test_source is None:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_anomaly_score_args = set_batch_anomaly_score_args(
args, fields=fields,
dataset_fields=test_fields)
def is_source_created(path, suffix=""):
"""Checks existence and reads the source id from the source file in the
path directory
"""
source_id = None
try:
with open("%s%ssource%s" % (path, os.sep, suffix)) as source_file:
source_id = source_file.readline().strip()
try:
source_id = bigml.api.get_source_id(source_id)
return True, source_id
except ValueError:
return False, None
except IOError:
return False, None
all_labels = labels
if args.objective_field:
csv_properties.update({'objective_field': args.objective_field})
if args.source_file:
# source is retrieved from the contents of the given local JSON file
source, csv_properties, fields = u.read_local_resource(
args.source_file,
csv_properties=csv_properties)
else:
# source is retrieved from the remote object
source, resume, csv_properties, fields = ps.source_processing(
api, args, resume,
csv_properties=csv_properties, multi_label_data=multi_label_data,
session_file=session_file, path=path, log=log)
if source is not None:
args.source = bigml.api.get_source_id(source)
if args.multi_label and source:
multi_label_data = l.get_multi_label_data(source)
(args.objective_field,
labels,
all_labels,
multi_label_fields) = l.multi_label_sync(args.objective_field,
labels,
multi_label_data,
fields,
multi_label_fields)
if fields and args.export_fields:
fields.summary_csv(os.path.join(path, args.export_fields))
if args.dataset_file:
# dataset is retrieved from the contents of the given local JSON file
model_dataset, csv_properties, fields = u.read_local_resource(
args.dataset_file,
if test_dataset is None:
test_dataset = get_test_dataset(args)
# Remote centroids: centroids are computed as batch centroids
# in bigml.com except when --no-batch flag is set on
if args.remote and not args.no_batch:
# create test source from file
test_name = "%s - test" % args.name
if args.test_source is None:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_centroid_args = rbc.set_batch_centroid_args(
args, fields=fields,
dataset_fields=test_fields)
test_dataset = get_test_dataset(args)
# Remote topic distributions:topic distributions are computed as
# batch topic distributions
# in bigml.com except when --no-batch flag is set.
if args.remote and not args.no_batch:
# create test source from file
test_name = "%s - test" % args.name
if args.test_source is None:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_topic_distribution_args = \
rtd.set_batch_topic_distribution_args( \
args, fields=fields, \
dataset_fields=test_fields)
# we create a new dataset to test with.
data_set, data_set_header = r.data_to_source(args)
if data_set is not None:
# Check if there's a created project for it
args.project_id = pp.project_processing(
api, args, resume, session_file=session_file, path=path, log=log)
source_args = r.set_source_args(args,
multi_label_data=multi_label_data,
data_set_header=data_set_header)
source = r.create_source(data_set, source_args, args, api,
path, session_file, log)
# If a source is provided either through the command line or in resume
# steps, we use it.
elif args.source:
source = bigml.api.get_source_id(args.source)
# If we already have source, we check that is finished , extract the
# fields, and update them if needed.
if source:
source = r.get_source(source, api, args.verbosity, session_file)
if 'source_parser' in source['object']:
source_parser = source['object']['source_parser']
if 'missing_tokens' in source_parser:
csv_properties['missing_tokens'] = (
source_parser['missing_tokens'])
if 'locale' in source_parser:
csv_properties['data_locale'] = source_parser['locale']
# No changes if user locale is the one in the source.
if (args.user_locale is not None and
bigml_locale(args.user_locale) ==
source_parser['locale']):
if test_dataset is None:
test_dataset = get_test_dataset(args)
# Remote predictions: predictions are computed as batch predictions
# in bigml.com except when --no-batch flag is set on
if args.remote and not args.no_batch:
# create test source from file
test_name = "%s - test" % args.name
if args.test_source is None:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
csv_properties.update(objective_field=None,
objective_field_present=False)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_prediction_args = set_batch_prediction_args(