Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:param metrics: List of the metrics to be computed
:param thread_count: Count of thread to use.
:param: evaluation_step: Step to evaluate metrics
:return: instance of EvaluationResult
"""
cases_set = set(cases)
if len(cases_set) != len(cases):
raise CatBoostError("Found duplicate cases in " + cases)
current_wd = self.__go_to_working_dir()
try:
if self._fold_count <= self._fold_offset:
error_msg = 'Count of folds(folds_count - offset) need to be at least one: offset {}, folds_count {}.'
raise AttributeError(error_msg.format(self._fold_offset,
self._fold_count))
handler = FoldModelsHandler(cases=cases,
metrics=metrics,
eval_step=evaluation_step,
thread_count=thread_count,
remove_models=self._remove_models)
reader = _SimpleStreamingFileReader(self._path_to_dataset,
sep=self._delimiter,
group_feature_num=self._group_feature_num)
splitter = _Splitter(reader,
self._column_description,
seed=self._seed,
min_folds_count=self._min_fold_count)
result = handler.proceed(splitter=splitter,
fold_size=self._fold_size,
folds_count=self._fold_count,
:param learn_files: Entities of FoldStorage for learning models.
:return: Dictionary of models where the key is case and the value is models on learn folds
"""
make_dirs_if_not_exists(FoldModelsHandler.__MODEL_DIR)
models = {}
for case in self._cases:
models[case] = list()
for file_num, learn_file in enumerate(learn_files):
pool = FoldModelsHandler._create_pool(learn_file, self._thread_count)
fold_id = fold_id_bias + file_num
for case in self._cases:
model_path = os.path.join(FoldModelsHandler.__MODEL_DIR,
FoldModelsHandler._create_model_name(case, fold_id))
get_eval_logger().debug("For model {} on fold #{} path is {}".format(str(case), fold_id, model_path))
fold_model = self._fit_model(pool, case, fold_id, model_path)
get_eval_logger().info("Model {} on fold #{} was fitted".format(str(case), fold_id))
models[case].append(fold_model)
return models
for case, case_models in grouped_by_case_models.items():
metric_calcers[case] = list()
for case_model in case_models:
metric_calcer = case_model.create_metrics_calcer(metrics,
eval_step=self._eval_step,
thread_count=self._thread_count)
metric_calcers[case].append(metric_calcer)
if self._metric_descriptions is None:
self._init_case_results(metric_calcer.metric_descriptions())
elif self._metric_descriptions != metric_calcer.metric_descriptions():
raise CatBoostError("Error: metric names should be consistent")
for file_num, fold_file in enumerate(learn_folds + skipped_folds + rest_folds):
pool = FoldModelsHandler._create_pool(fold_file, self._thread_count)
for case, case_models in grouped_by_case_models.items():
calcers = metric_calcers[case]
for model_num, model in enumerate(case_models):
if file_num != model_num:
calcers[model_num].add(pool)
for case, case_models in grouped_by_case_models.items():
calcers = metric_calcers[case]
case_results = self._case_results[case]
for calcer, model in zip(calcers, case_models):
scores = calcer.eval_metrics()
for metric in self._metric_descriptions:
case_results[metric]._add(model, scores.get_result(metric))
def _remove_model_dir():
try:
if os.path.exists(FoldModelsHandler.__MODEL_DIR):
os.rmdir(FoldModelsHandler.__MODEL_DIR)
except OSError as err:
get_eval_logger().warning(str(err))