Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
message = u.dated("Batch centroid not found. Resuming.\n")
resume, batch_centroid = c.checkpoint(
c.is_batch_centroid_created, path, debug=args.debug,
message=message, log_file=session_file, console=args.verbosity)
if not resume:
batch_centroid = create_batch_centroid(
cluster_id, test_dataset, batch_centroid_args,
args, api, session_file=session_file, path=path, log=log)
if not args.no_csv:
file_name = api.download_batch_centroid(batch_centroid,
prediction_file)
if file_name is None:
sys.exit("Failed downloading CSV.")
if args.to_dataset:
batch_centroid = bigml.api.check_resource(batch_centroid, api=api)
new_dataset = bigml.api.get_dataset_id(
batch_centroid['object']['output_dataset_resource'])
if new_dataset is not None:
message = u.dated("Batch centroid dataset created: %s\n"
% u.get_url(new_dataset))
u.log_message(message, log_file=session_file,
console=args.verbosity)
u.log_created_resources("batch_centroid_dataset",
path, new_dataset, mode='a')
message = u.dated("Batch prediction not found. Resuming.\n")
resume, batch_prediction = c.checkpoint(
c.is_batch_prediction_created, path, debug=args.debug,
message=message, log_file=session_file, console=args.verbosity)
if not resume:
batch_prediction = create_batch_prediction(
deepnet_id, test_dataset, batch_prediction_args,
args, api, session_file=session_file, path=path, log=log)
if not args.no_csv:
file_name = api.download_batch_prediction(batch_prediction,
prediction_file)
if file_name is None:
sys.exit("Failed downloading CSV.")
if args.to_dataset:
batch_prediction = bigml.api.check_resource(batch_prediction, api=api)
new_dataset = bigml.api.get_dataset_id(
batch_prediction['object']['output_dataset_resource'])
if new_dataset is not None:
message = u.dated("Batch prediction dataset created: %s\n"
% u.get_url(new_dataset))
u.log_message(message, log_file=session_file,
console=args.verbosity)
u.log_created_resources("batch_prediction_dataset",
path, new_dataset, mode='a')
api, args, resume, session_file=session_file,
path=path, log=log)
(test_source, resume, csv_properties,
test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = rds.set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
csv_properties.update(objective_field=None,
objective_field_present=False)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
if args.to_dataset and args.dataset_off:
model = api.check_resource(model['resource'],
query_string=r.ALL_FIELDS_QS)
model_fields = Fields(model)
objective_field_name = model_fields.field_name( \
model_fields.objective_field)
if objective_field_name in test_fields.fields_by_name.keys():
args.prediction_name = "%s (predicted)" % \
objective_field_name
def is_dataset_created(path, suffix=""):
"""Checks existence and reads the dataset id from the dataset file in
the path directory
"""
dataset_id = None
try:
with open("%s%sdataset%s" % (path, os.sep, suffix)) as dataset_file:
dataset_id = dataset_file.readline().strip()
try:
dataset_id = bigml.api.get_dataset_id(dataset_id)
return True, dataset_id
except ValueError:
return False, None
except IOError:
return False, None
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
csv_properties.update(objective_field=None,
objective_field_present=False)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_prediction_args = rbp.set_batch_prediction_args(
args, fields=fields,
dataset_fields=test_fields)
remote_prediction(logistic_regression, test_dataset, \
batch_prediction_args, args, \
api, resume, prediction_file=output, \
session_file=session_file, path=path, log=log)
else:
fields = None
dataset = None
datasets = []
if args.dataset_file:
# dataset is retrieved from the contents of the given local JSON file
model_dataset, csv_properties, fields = u.read_local_resource(
args.dataset_file,
csv_properties=csv_properties)
if not args.datasets:
datasets = [model_dataset]
dataset = model_dataset
else:
datasets = u.read_datasets(args.datasets)
dataset_id = dataset['resource']
elif args.dataset:
dataset_id = bigml.api.get_dataset_id(args.dataset)
datasets = [dataset_id]
elif args.dataset_ids:
datasets = args.dataset_ids
dataset_id = datasets[0]
if dataset_id:
if not dataset:
dataset = api.check_resource(dataset_id,
query_string=ALL_FIELDS_QS)
try:
args.objective_field = int(args.objective_field)
except (TypeError, ValueError):
pass
# if the user provided no objective field, try to use the one in the
# dataset
if args.objective_field is None:
def csv_name(user_filename, path, dataset):
"""Building CSV exported dataset filename from the user-given value
"""
if user_filename == '':
dataset_id = bigml.api.get_dataset_id(dataset)
if dataset_id:
return os.path.join(path,
"%s.csv" % dataset_id.replace("/", "_"))
return os.path.join(path, user_filename)
dataset_args = r.set_dataset_args(args, fields,
multi_label_data=multi_label_data)
dataset = r.create_dataset(source, dataset_args, args, api,
path, session_file, log)
# If set of datasets is provided, let's check their ids.
elif args.dataset_ids:
for i in range(0, len(args.dataset_ids)):
dataset_id = args.dataset_ids[i]
if isinstance(dataset_id, dict) and "id" in dataset_id:
dataset_id = dataset_id["id"]
datasets.append(bigml.api.get_dataset_id(dataset_id))
dataset = datasets[0]
# If a dataset is provided, let's retrieve it.
elif args.dataset:
dataset = bigml.api.get_dataset_id(args.dataset)
# If we already have a dataset, we check the status and get the fields if
# we hadn't them yet.
if dataset:
dataset = r.get_dataset(dataset, api, args.verbosity, session_file)
if ('object' in dataset and 'objective_field' in dataset['object'] and
'column_number' in dataset['object']['objective_field']):
dataset_objective = dataset[
'object']['objective_field']['column_number']
csv_properties.update(objective_field=dataset_objective,
objective_field_present=True)
fields = get_fields_structure(dataset, csv_properties)
if args.public_dataset:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
csv_properties.update(objective_field=None,
objective_field_present=False)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_prediction_args = set_batch_prediction_args(
args, fields=fields,
dataset_fields=test_fields)
remote_dn_prediction(deepnet, test_dataset, \
batch_prediction_args, args, \
api, resume, prediction_file=output, \
session_file=session_file, path=path, log=log)
else: