Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
are not being used in the model
"""
# Checks and cleans input_data leaving the fields used in the model
unused_fields = []
new_data = self.filter_input_data( \
input_data,
add_unused_fields=full)
if full:
input_data, unused_fields = new_data
else:
input_data = new_data
# Strips affixes for numeric values and casts to the final field type
cast(input_data, self.fields)
# When operating_point is used, we need the probabilities
# of all possible classes to decide, so se use
# the `predict_probability` method
if operating_point:
return self.predict_operating( \
input_data, operating_point=operating_point)
if operating_kind:
return self.predict_operating_kind( \
input_data, operating_kind=operating_kind)
# In case that missing_numerics is False, checks that all numeric
# fields are present in input data.
if not self.missing_numerics:
check_no_missing_numerics(input_data, self.model_fields,
self.weight_field)
- unused_fields: list of fields in the input data that
are not being used in the model
"""
# Checks and cleans input_data leaving the fields used in the model
new_data = self.filter_input_data( \
input_data,
add_unused_fields=full)
unused_fields = None
if full:
input_data, unused_fields = new_data
else:
input_data = new_data
# Strips affixes for numeric values and casts to the final field type
cast(input_data, self.fields)
if median and method is None:
# predictions with median are only available with old combiners
method = PLURALITY_CODE
if method is None and operating_point is None and \
operating_kind is None and not median:
# operating_point has precedence over operating_kind. If no
# combiner is set, default operating kind is "probability"
operating_kind = "probability"
if operating_point:
if self.regression:
raise ValueError("The operating_point argument can only be"
" used in classifications.")
prediction = self.predict_operating( \
from bigml.util import cast, NUMERIC
from bigml.predicate import TM_FULL_TERM, TM_ALL
from bigml.cluster import parse_terms, parse_items
from bigml.logistic import LogisticRegression, balance_input
from bigml.modelfields import get_unique_terms
CONSTANTS = """
EXPANSION_ATTRIBUTES = {"categorical": "categories", "text": "tag_cloud",
"items": "items"}
TM_FULL_TERM = %s
TM_ALL = %s
NUMERIC = %s
""" % (repr(TM_FULL_TERM), repr(TM_ALL), repr(NUMERIC))
FUNCTIONS = [cast, parse_terms, parse_items, get_unique_terms,
balance_input]
CLASS_DEFINITION = """
class BasicLR(object):
def __init__(self, lr_dict):
self.__dict__ = lr_dict
"""
CLASS_METHODS = ["predict", "filter_input_data", "category_probability",
"get_unique_terms", "get_coefficients", "normalize"]
def summary_example(field):
# Checks and cleans input_data leaving the fields used in the model
unused_fields = []
new_data = self.filter_input_data( \
input_data,
add_unused_fields=full)
if full:
input_data, unused_fields = new_data
else:
input_data = new_data
if not self.missing_numerics:
check_no_missing_numerics(input_data, self.model_fields)
# Strips affixes for numeric values and casts to the final field type
cast(input_data, self.fields)
full_prediction = self._predict( \
input_data, missing_strategy=missing_strategy,
operating_point=operating_point,
unused_fields=unused_fields)
if full:
return dict((key, value) for key, value in \
full_prediction.iteritems() if value is not None)
return full_prediction['prediction']
- prediction: the prediction value
- unused_fields: list of fields in the input data that
are not being used in the model
"""
# Checks and cleans input_data leaving the fields used in the model
unused_fields = []
new_data = self.filter_input_data( \
input_data,
add_unused_fields=full)
if full:
new_data, unused_fields = new_data
# Strips affixes for numeric values and casts to the final field type
cast(new_data, self.fields)
# In case that the training data has no missings, input data shouldn't
check_no_training_missings(new_data, self.model_fields,
self.weight_field,
self.objective_id)
# Computes text and categorical field expansion
unique_terms = self.get_unique_terms(new_data)
# Creates an input vector with the values for all expanded fields.
input_array = self.expand_input(new_data, unique_terms)
compact_input_array = self.expand_input(new_data, unique_terms, True)
prediction = dot([flatten(self.coefficients)], [input_array])[0][0]
result = {
- probability: prediction's probability
- unused_fields: list of fields in the input data that
are not being used in the model
"""
# Checks and cleans input_data leaving the fields used in the model
unused_fields = []
new_data = self.filter_input_data( \
input_data, add_unused_fields=full)
if full:
input_data, unused_fields = new_data
else:
input_data = new_data
# Strips affixes for numeric values and casts to the final field type
cast(input_data, self.fields)
# When operating_point is used, we need the probabilities
# of all possible classes to decide, so se use
# the `predict_probability` method
if operating_point:
if self.regression:
raise ValueError("The operating_point argument can only be"
" used in classifications.")
return self.predict_operating( \
input_data, operating_point=operating_point)
if operating_kind:
if self.regression:
raise ValueError("The operating_point argument can only be"
" used in classifications.")
return self.predict_operating_kind( \
input_data, operating_kind=operating_kind)
in the predicted node
- unused_fields: list of fields in the input data that
are not being used in the model
"""
# Checks and cleans input_data leaving the fields used in the model
unused_fields = []
new_data = self.filter_input_data( \
input_data,
add_unused_fields=full)
if full:
input_data, unused_fields = new_data
else:
input_data = new_data
# Strips affixes for numeric values and casts to the final field type
cast(input_data, self.fields)
full_prediction = self._predict( \
input_data, missing_strategy=missing_strategy,
operating_point=operating_point, operating_kind=operating_kind,
unused_fields=unused_fields)
if full:
return dict((key, value) for key, value in \
full_prediction.iteritems() if value is not None)
return full_prediction['prediction']
To produce an anomaly score, we evaluate each tree in the iforest
for its depth result (see the depth method in the AnomalyTree
object for details). We find the average of these depths
to produce an `observed_mean_depth`. We calculate an
`expected_mean_depth` using the `sample_size` and `mean_depth`
parameters which come as part of the forest message.
We combine those values as seen below, which should result in a
value between 0 and 1.
"""
# Checks and cleans input_data leaving the fields used in the model
input_data = self.filter_input_data(input_data)
# Strips affixes for numeric values and casts to the final field type
cast(input_data, self.fields)
depth_sum = 0
if self.iforest is None:
raise Exception("We could not find the iforest information to "
"compute the anomaly score. Please, rebuild your "
"Anomaly object from a complete anomaly detector "
"resource.")
for tree in self.iforest:
depth_sum += tree.depth(input_data)[0]
observed_mean_depth = float(depth_sum) / len(self.iforest)
return math.pow(2, - observed_mean_depth / self.expected_mean_depth)