Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def best_first_search(datasets_file, api, args, command_obj,
staleness=None, penalty=None, objective_name=None,
resume=False):
"""Selecting the fields to be used in the model construction
"""
counter = 0
loop_counter = 0
features_file = os.path.normpath(os.path.join(args.output_dir,
FEATURES_LOG))
features_writer = UnicodeWriter(features_file).open_writer()
features_header = FEATURES_HEADER
if staleness is None:
staleness = DEFAULT_STALENESS
if penalty is None:
penalty = DEFAULT_PENALTY
# retrieving the first dataset in the file
try:
with open(datasets_file, u.open_mode("r")) as datasets_handler:
dataset_id = datasets_handler.readline().strip()
except IOError, exc:
sys.exit("Could not read the generated datasets file: %s" %
str(exc))
try:
stored_dataset = u.storage_file_name(args.output_dir, dataset_id)
with open(stored_dataset, u.open_mode("r")) as dataset_handler:
dataset = json.loads(dataset_handler.read())
"""Writes the final forecast to the required output
The function creates a new file per field used in the forecast input data.
The id of the field will be appended to the name provided in the `output`
parameter.
"""
for objective_id, forecast_value in forecast.items():
headers = [f["model"] for f in forecast_value]
points = []
if not forecast_value:
sys.exit("No forecasts available")
for index in range(len(forecast_value[0]["point_forecast"])):
points.append([f["point_forecast"][index] for f in forecast_value])
output_file = "%s_%s.csv" % (output, objective_id)
with UnicodeWriter(output_file, lineterminator="\n") as out_handler:
out_handler.writerow(headers)
for row in points:
out_handler.writerow(row)
def best_candidates_number(datasets_file, args, command_obj,
penalty=None,
resume=False):
"""Selecting the best number of random candidates
to be used in the ensemble construction
"""
loop_counter = 0
candidates_file = os.path.normpath(os.path.join(args.output_dir,
CANDIDATES_LOG))
candidates_writer = UnicodeWriter(candidates_file).open_writer()
candidates_writer.writerow(CANDIDATES_HEADER)
args.output_dir = os.path.normpath(os.path.join(args.output_dir,
"random"))
max_candidates = args.max_candidates + 1
if args.nodes_step is None:
args.nodes_step = DEFAULT_CANDIDATES_STEP
random_candidates = args.min_candidates
if penalty is None:
penalty = DEFAULT_CANDIDATES_PENALTY
best_score = - float('inf')
metric = args.optimize
score = best_score
best_counter = 0
while random_candidates < max_candidates:
prediction_file = UnicodeWriter(prediction_file).open_writer()
for model in models:
model = bigml.api.get_model_id(model)
predictions_file = get_predictions_file_name(model,
output_path)
predictions_files.append(predictions_file)
if (not resume or
not c.checkpoint(c.are_predictions_created, predictions_file,
test_reader.number_of_tests(),
debug=args.debug)[0]):
if not message_logged:
message = u.dated("Creating remote predictions.\n")
u.log_message(message, log_file=session_file,
console=args.verbosity)
message_logged = True
with UnicodeWriter(predictions_file) as predictions_file:
for input_data in raw_input_data_list:
input_data_dict = test_reader.dict(input_data)
prediction = api.create_prediction(model, input_data_dict,
args=prediction_args)
u.check_resource_error(prediction,
"Failed to create prediction: ")
u.log_message("%s\n" % prediction['resource'],
log_file=log)
prediction_row = prediction_to_row(prediction)
predictions_file.writerow(prediction_row)
if single_model:
write_prediction(prediction_row[0:2], prediction_file,
args.prediction_info,
input_data, exclude)
if single_model:
prediction_file.close_writer()
def topic_distribution(topic_models, fields, args, session_file=None):
"""Computes a topic distribution for each entry in the `test_set`.
"""
test_set = args.test_set
test_set_header = args.test_header
output = args.predictions
test_reader = TestReader(test_set, test_set_header, fields,
None,
test_separator=args.test_separator)
with UnicodeWriter(output, lineterminator="\n") as output:
# columns to exclude if input_data is added to the prediction field
exclude, headers = use_prediction_headers(
test_reader, fields, args)
# Local topic distributions: Topic distributions are computed
# locally using topic models'
# method
message = u.dated("Creating local topic distributions.\n")
u.log_message(message, log_file=session_file, console=args.verbosity)
local_topic_distribution(topic_models, test_reader, output,
args, exclude=exclude, headers=headers)
test_reader.close()
def prediction(models, fields, args, session_file=None):
"""Computes a supervised model prediction
for each entry in the `test_set`.
"""
test_set = args.test_set
test_set_header = args.test_header
output = args.predictions
test_reader = TestReader(test_set, test_set_header, fields,
None,
test_separator=args.test_separator)
with UnicodeWriter(output, lineterminator="\n") as output:
# columns to exclude if input_data is added to the prediction field
exclude = use_prediction_headers(
args.prediction_header, output, test_reader, fields, args,
args.objective_field, quality="probability")
# Local predictions: Predictions are computed locally
message = u.dated("Creating local predictions.\n")
u.log_message(message, log_file=session_file, console=args.verbosity)
local_prediction(models, test_reader,
output, args, exclude=exclude)
test_reader.close()
def combine_votes(votes_files, to_prediction, to_file, method=0,
prediction_info=NORMAL_FORMAT, input_data_list=None,
exclude=None):
"""Combines the votes found in the votes' files and stores predictions.
votes_files: should contain the list of file names
to_prediction: is the Model method that casts prediction to numeric
type if needed
to_file: is the name of the final output file.
"""
votes = read_votes(votes_files, to_prediction)
u.check_dir(to_file)
with UnicodeWriter(to_file) as output:
number_of_tests = len(votes)
if input_data_list is None or len(input_data_list) != number_of_tests:
input_data_list = None
for index in range(0, number_of_tests):
multivote = votes[index]
input_data = (None if input_data_list is None
else input_data_list[index])
write_prediction(multivote.combine(method, full=True), output,
prediction_info, input_data, exclude)
"""
prediction_args = {
"tags": args.tag,
"combiner": args.method
}
if output_path is None:
output_path = u.check_dir(prediction_file)
if (not resume or not c.checkpoint(
c.are_predictions_created, prediction_file,
test_reader.number_of_tests(), debug=args.debug)[0]):
message = u.dated("Creating remote predictions.")
u.log_message(message, log_file=session_file,
console=args.verbosity)
with UnicodeWriter(prediction_file) as predictions_file:
for input_data in test_reader:
input_data_dict = test_reader.dict(input_data)
prediction = api.create_prediction(ensemble_id,
input_data_dict,
wait_time=0,
args=prediction_args)
prediction = u.check_resource(prediction,
api.get_prediction)
u.check_resource_error(prediction,
"Failed to create prediction: ")
u.log_message("%s\n" % prediction['resource'], log_file=log)
prediction_row = prediction_to_row(prediction,
args.prediction_info)
write_prediction(prediction_row, predictions_file,
args.prediction_info, input_data, exclude)
headers_names.append("error")
for index in range(0, self._max_bins):
headers_names.append("bin%s_value" % index)
headers_names.append("bin%s_instances" % index)
else:
headers_names.append(
self.fields[self.tree.objective_id]['name'])
headers_names.append("confidence")
headers_names.append("impurity")
for category, _ in self.tree.distribution:
headers_names.append(category)
nodes_generator = self.get_nodes_info(headers_names,
leaves_only=leaves_only)
if file_name is not None:
with UnicodeWriter(file_name) as writer:
writer.writerow([header.encode("utf-8")
for header in headers_names])
for row in nodes_generator:
writer.writerow([item if not isinstance(item, basestring)
else item.encode("utf-8")
for item in row])
else:
rows = []
rows.append(headers_names)
for row in nodes_generator:
rows.append(row)
return rows
def summary_csv(self, filename=None):
"""Summary of the contents of the fields
"""
summary = []
writer = None
if filename is not None:
writer = UnicodeWriter(filename,
quoting=csv.QUOTE_NONNUMERIC).open_writer()
writer.writerow(SUMMARY_HEADERS)
else:
summary.append(SUMMARY_HEADERS)
for field_column in self.fields_columns:
field_id = self.field_id(field_column)
field = self.fields.get(field_id)
field_summary = []
field_summary.append(field.get('column_number'))
field_summary.append(field_id)
field_summary.append(field.get('name'))
field_summary.append(field.get('label'))
field_summary.append(field.get('description'))
field_summary.append(field.get('optype'))
field_summary_value = field.get('summary', {})