Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _linear_kernel(self, support_vector):
elem_wise = [
utils.mul(ast.NumVal(support_element), ast.FeatureRef(i))
for i, support_element in enumerate(support_vector)
]
return utils.apply_op_to_expressions(ast.BinNumOpType.ADD, *elem_wise)
def _assemble_single_output(self, trees, base_score=0):
if self._tree_limit:
trees = trees[:self._tree_limit]
trees_ast = [self._assemble_tree(t) for t in trees]
to_sum = trees_ast
# In a large tree we need to generate multiple subroutines to avoid
# java limitations https://github.com/BayesWitnesses/m2cgen/issues/103.
trees_num_leaves = [self._count_leaves(t) for t in trees]
if sum(trees_num_leaves) > self._leaves_cutoff_threshold:
to_sum = self._split_into_subroutines(trees_ast, trees_num_leaves)
tmp_ast = utils.apply_op_to_expressions(
ast.BinNumOpType.ADD,
ast.NumVal(base_score),
*to_sum)
result_ast = self._final_transform(tmp_ast)
return ast.SubroutineExpr(result_ast)
def _split_into_subroutines(self, trees_ast, trees_num_leaves):
result = []
subroutine_trees = []
subroutine_sum_leaves = 0
for tree, num_leaves in zip(trees_ast, trees_num_leaves):
next_sum = subroutine_sum_leaves + num_leaves
if subroutine_trees and next_sum > self._leaves_cutoff_threshold:
# Exceeded the max leaves in the current subroutine,
# finalize this one and start a new one.
partial_result = utils.apply_op_to_expressions(
ast.BinNumOpType.ADD,
*subroutine_trees)
result.append(ast.SubroutineExpr(partial_result))
subroutine_trees = []
subroutine_sum_leaves = 0
subroutine_sum_leaves += num_leaves
subroutine_trees.append(tree)
if subroutine_trees:
partial_result = utils.apply_op_to_expressions(
ast.BinNumOpType.ADD,
*subroutine_trees)
result.append(ast.SubroutineExpr(partial_result))
def _assemble_bin_class_output(self, trees):
# Base score is calculated based on https://github.com/dmlc/xgboost/blob/master/src/objective/regression_loss.h#L64 # noqa
# return -logf(1.0f / base_score - 1.0f);
base_score = 0
if self._base_score != 0:
base_score = -np.log(1.0 / self._base_score - 1.0)
expr = self._assemble_single_output(trees, base_score)
proba_expr = utils.sigmoid_expr(expr, to_reuse=True)
return ast.VectorVal([
ast.BinNumExpr(ast.NumVal(1), proba_expr, ast.BinNumOpType.SUB),
proba_expr
])
def _assemble_multi_class_output(self, trees):
# Multi-class output is calculated based on discussion in
# https://github.com/dmlc/xgboost/issues/1746#issuecomment-295962863
splits = _split_trees_by_classes(trees, self._output_size)
base_score = self._base_score
exprs = [self._assemble_single_output(t, base_score) for t in splits]
proba_exprs = utils.softmax_exprs(exprs)
return ast.VectorVal(proba_exprs)
"sigmoid": self._sigmoid_kernel,
"poly": self._poly_kernel,
"linear": self._linear_kernel
}
kernel_type = model.kernel
if kernel_type not in supported_kernels:
raise ValueError("Unsupported kernel type {}".format(kernel_type))
self._kernel_fun = supported_kernels[kernel_type]
n_features = len(model.support_vectors_[0])
gamma = model.gamma
if gamma == "auto" or gamma == "auto_deprecated":
gamma = 1.0 / n_features
self._gamma_expr = ast.NumVal(gamma)
self._neg_gamma_expr = utils.sub(ast.NumVal(0), ast.NumVal(gamma),
to_reuse=True)
self._output_size = 1
if type(model).__name__ in ("SVC", "NuSVC"):
n_classes = len(model.n_support_)
if n_classes > 2:
self._output_size = n_classes
def _final_transform(self, ast_to_transform):
if self.average_output:
coef = 1 / self.n_iter
return utils.apply_bin_op(
ast_to_transform,
ast.NumVal(coef),
ast.BinNumOpType.MUL)
else:
return super()._final_transform(ast_to_transform)
def _linear_kernel_with_gama_and_coef(self, support_vector):
kernel = self._linear_kernel(support_vector)
kernel = utils.mul(self._gamma_expr, kernel)
return utils.add(kernel, ast.NumVal(self.model.coef0))