Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def remote_prediction(model, test_dataset,
batch_prediction_args, args,
api, resume, prediction_file=None, session_file=None,
path=None, log=None):
"""Computes a prediction for each entry in the `test_set`.
Predictions are computed remotely using the batch prediction call.
"""
model_id = bigml.api.get_resource_id( \
model)
# if resuming, try to extract dataset form log files
if resume:
message = u.dated("Batch prediction not found. Resuming.\n")
resume, batch_prediction = c.checkpoint(
c.is_batch_prediction_created, path, debug=args.debug,
message=message, log_file=session_file, console=args.verbosity)
if not resume:
batch_prediction = create_batch_prediction(
model_id, test_dataset, batch_prediction_args,
args, api, session_file=session_file, path=path, log=log)
if not args.no_csv:
file_name = api.download_batch_prediction(batch_prediction,
prediction_file)
if file_name is None:
sys.exit("Failed downloading CSV.")
if test_dataset is None:
test_dataset = get_test_dataset(args)
# Remote predictions: predictions are computed as batch predictions
# in bigml.com except when --no-batch flag is set on
if args.remote and not args.no_batch:
# create test source from file
test_name = "%s - test" % args.name
if args.test_source is None:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
csv_properties.update(objective_field=None,
objective_field_present=False)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_prediction_args = rbp.set_batch_prediction_args(
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
batch_topic_distribution_args = \
rtd.set_batch_topic_distribution_args( \
args, fields=fields, \
dataset_fields=test_fields)
remote_topic_distribution( \
topic_model, test_dataset, batch_topic_distribution_args,
args, api, resume, prediction_file=output,
session_file=session_file, path=path, log=log)
else:
topic_distribution(topic_models, fields, args,
session_file=session_file)
def create_samples(datasets, sample_ids, sample_args,
args, api=None, path=None,
session_file=None, log=None):
"""Create remote samples
"""
if api is None:
api = bigml.api.BigML()
samples = sample_ids[:]
existing_samples = len(samples)
sample_args_list = []
datasets = datasets[existing_samples:]
# if resuming and all samples were created, there will be no datasets left
if datasets:
if isinstance(sample_args, list):
sample_args_list = sample_args
# Only one sample per command, at present
number_of_samples = 1
max_parallel_samples = 1
message = dated("Creating %s.\n" %
plural("sample", number_of_samples))
log_message(message, log_file=session_file,
def delete(api, delete_list):
""" Deletes the resources given in the list.
"""
delete_functions = {bigml.api.SOURCE_RE: api.delete_source,
bigml.api.DATASET_RE: api.delete_dataset,
bigml.api.MODEL_RE: api.delete_model,
bigml.api.PREDICTION_RE: api.delete_prediction,
bigml.api.EVALUATION_RE: api.delete_evaluation,
bigml.api.ENSEMBLE_RE: api.delete_ensemble}
for resource_id in delete_list:
resource_type = None
try:
for resource_type in delete_functions:
try:
bigml.api.get_resource(resource_type, resource_id)
break
except ValueError:
pass
delete_functions[resource_type](resource_id)
except ValueError:
console_log("Failed to delete resource %s" % resource_id)
args.remote):
test_dataset = get_test_dataset(args)
# Remote predictions: predictions are computed as batch predictions
# in bigml.com except when --no-batch flag is set on
if args.remote and not args.no_batch:
# create test source from file
test_name = "%s - test" % args.name
if args.test_source is None:
test_properties = ps.test_source_processing(
api, args, resume, name=test_name,
session_file=session_file, path=path, log=log)
(test_source, resume,
csv_properties, test_fields) = test_properties
else:
test_source_id = bigml.api.get_source_id(args.test_source)
test_source = api.check_resource(test_source_id)
if test_dataset is None:
# create test dataset from test source
dataset_args = set_basic_dataset_args(args, name=test_name)
test_dataset, resume = pd.alternative_dataset_processing(
test_source, "test", dataset_args, api, args,
resume, session_file=session_file, path=path, log=log)
else:
test_dataset_id = bigml.api.get_dataset_id(test_dataset)
test_dataset = api.check_resource(test_dataset_id)
csv_properties.update(objective_field=None,
objective_field_present=False)
test_fields = pd.get_fields_structure(test_dataset,
csv_properties)
if not args.evaluate:
args: dict for the rest of arguments
"""
self.action = action
self.origins = [resource_id] if origins is None and \
action == "update" else origins
self.args = args or {}
input_data = self.args.get("input_data")
if input_data:
del self.args["input_data"]
self.input_data = input_data
self.resource_id = resource_id
self.resource_type = resource_type
self.suffix = suffix
if resource_id and not resource_type:
self.resource_type = bigml.api.get_resource_type(self.resource_id)
else:
self.resource_type = resource_type
self.name = name
"""
try:
gazibit_tmp = GAZIBIT_SHARED if shared else GAZIBIT_PRIVATE
path = check_dir(os.path.join(output_dir,
REPORTS_DIR,
os.path.basename(gazibit_tmp)))
input_file = os.path.join(path, os.path.basename(gazibit_tmp))
output_file = tempfile.NamedTemporaryFile(
mode="w", dir=output_dir, delete=False)
if not os.path.isfile(input_file):
shutil.copyfile(gazibit_tmp, input_file)
with open(input_file, "r") as report_template:
with output_file as report_output:
content = report_template.read()
resource_type = bigml.api.get_resource_type(resource)
resource_type = resource_type.upper()
url_template = URL_TEMPLATE % resource_type
# For shared reports, use the embedded model tree
if shared and (resource_type in EMBEDDED_RESOURCES):
url = get_url(resource, embedded=True)
else:
url = get_url(resource, shared=shared)
content = content.replace(url_template, url)
section_template = SECTION_START % resource_type
content = content.replace(section_template, "")
section_template = SECTION_END % resource_type
content = content.replace(section_template, "")
report_output.write(content)
os.remove(input_file)
os.rename(output_file.name, input_file)
except IOError, exc: