Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.api = get_api_connection(api)
self.resource_id, association = get_resource_dict( \
association, "association", api=self.api)
if 'object' in association and isinstance(association['object'], dict):
association = association['object']
if 'associations' in association and \
isinstance(association['associations'], dict):
status = get_status(association)
if 'code' in status and status['code'] == FINISHED:
self.input_fields = association['input_fields']
associations = association['associations']
fields = associations['fields']
ModelFields.__init__( \
self, fields, \
missing_tokens=associations.get('missing_tokens'))
self.complement = associations.get('complement', False)
self.discretization = associations.get('discretization', {})
self.field_discretizations = associations.get(
'field_discretizations', {})
self.items = [Item(index, item, fields) for index, item in
enumerate(associations.get('items', []))]
self.max_k = associations.get('max_k', 100)
self.max_lhs = associations.get('max_lhs', 4)
self.min_confidence = associations.get('min_confidence', 0)
self.min_leverage = associations.get('min_leverage', -1)
self.min_support = associations.get('min_support', 0)
self.min_lift = associations.get('min_lift', 0)
self.search_strategy = associations.get('search_strategy', \
DEFAULT_SEARCH_STRATEGY)
""" Returns an array that represents the frequency of terms as ordered
in the reference `terms` parameter.
"""
input_terms = unique_terms.get(field_id, [])
terms_array = [0] * len(terms)
try:
for term, frequency in input_terms:
index = terms.index(term)
terms_array[index] = frequency
except ValueError:
pass
return terms_array
class LinearRegression(ModelFields):
""" A lightweight wrapper around a linear regression model.
Uses a BigML remote linear regression model to build a local version
that can be used to generate predictions locally.
"""
def __init__(self, linear_regression, api=None):
self.resource_id = None
self.input_fields = []
self.term_forms = {}
self.tag_clouds = {}
self.term_analysis = {}
self.items = {}
self.item_analysis = {}
self.c = logistic_regression_info.get('c')
self.eps = logistic_regression_info.get('eps')
self.lr_normalize = logistic_regression_info.get('normalize')
self.balance_fields = logistic_regression_info.get( \
'balance_fields')
self.regularization = logistic_regression_info.get( \
'regularization')
self.field_codings = logistic_regression_info.get( \
'field_codings', {})
# old models have no such attribute, so we set it to False in
# this case
self.missing_numerics = logistic_regression_info.get( \
'missing_numerics', False)
objective_id = extract_objective(objective_field)
missing_tokens = logistic_regression_info.get("missing_tokens")
ModelFields.__init__(
self, fields,
objective_id=objective_id, terms=True, categories=True,
numerics=True, missing_tokens=missing_tokens)
self.field_codings = logistic_regression_info.get( \
'field_codings', {})
self.format_field_codings()
for field_id in self.field_codings:
if field_id not in self.fields and \
field_id in self.inverted_fields:
self.field_codings.update( \
{self.inverted_fields[field_id]: \
self.field_codings[field_id]})
del self.field_codings[field_id]
if old_coefficients:
self.map_coefficients()
categories = self.fields[self.objective_id].get( \
"en": u'english',
"fi": u'finnish',
"fr": u'french',
"de": u'german',
"hu": u'hungarian',
"it": u'italian',
"nn": u'norwegian',
"pt": u'portuguese',
"ro": u'romanian',
"ru": u'russian',
"es": u'spanish',
"sv": u'swedish',
"tr": u'turkish'
}
class TopicModel(ModelFields):
""" A lightweight wrapper around a Topic Model.
Uses a BigML remote Topic Model to build a local version that can be used
to generate topic distributions for input documents locally.
"""
def __init__(self, topic_model, api=None):
self.resource_id = None
self.stemmer = None
self.seed = None
self.case_sensitive = False
self.bigrams = False
self.ntopics = None
self.temp = None
import json
from bigml.api import FINISHED
from bigml.api import get_status, get_api_connection
from bigml.util import cast
from bigml.basemodel import get_resource_dict
from bigml.modelfields import ModelFields
from bigml.anomalytree import AnomalyTree
LOGGER = logging.getLogger('BigML')
DEPTH_FACTOR = 0.5772156649
class Anomaly(ModelFields):
""" A lightweight wrapper around an anomaly detector.
Uses a BigML remote anomaly detector model to build a local version that
can be used to generate anomaly scores locally.
"""
def __init__(self, anomaly, api=None):
self.resource_id = None
self.sample_size = None
self.input_fields = None
self.mean_depth = None
self.expected_mean_depth = None
self.iforest = None
self.top_anomalies = None
self.input_fields = [ \
field_id for field_id, _ in
sorted(fields.items(),
key=lambda x: x[1].get("column_number"))]
self.coeff_ids = self.input_fields[:]
self.coefficients = linear_regression_info.get( \
'coefficients', [])
self.bias = linear_regression_info.get('bias', True)
self.field_codings = linear_regression_info.get( \
'field_codings', {})
self.number_of_parameters = linear_regression_info.get( \
"number_of_parameters")
missing_tokens = linear_regression_info.get("missing_tokens")
objective_id = extract_objective(objective_field)
ModelFields.__init__(
self, fields,
objective_id=objective_id, terms=True, categories=True,
numerics=True, missing_tokens=missing_tokens)
self.field_codings = linear_regression_info.get( \
'field_codings', {})
self.format_field_codings()
for field_id in self.field_codings:
if field_id not in fields and \
field_id in self.inverted_fields:
self.field_codings.update( \
{self.inverted_fields[field_id]: \
self.field_codings[field_id]})
del self.field_codings[field_id]
stats = linear_regression_info["stats"]
if STATS and stats is not None and \
stats.get("xtx_inverse") is not None:
self.mean_depth = None
self.expected_mean_depth = None
self.iforest = None
self.top_anomalies = None
self.id_fields = []
self.api = get_api_connection(api)
self.resource_id, anomaly = get_resource_dict( \
anomaly, "anomaly", api=self.api)
if 'object' in anomaly and isinstance(anomaly['object'], dict):
anomaly = anomaly['object']
self.sample_size = anomaly.get('sample_size')
self.input_fields = anomaly.get('input_fields')
self.id_fields = anomaly.get('id_fields', [])
if 'model' in anomaly and isinstance(anomaly['model'], dict):
ModelFields.__init__( \
self, anomaly['model'].get('fields'), \
missing_tokens=anomaly['model'].get('missing_tokens'))
if ('top_anomalies' in anomaly['model'] and
isinstance(anomaly['model']['top_anomalies'], list)):
self.mean_depth = anomaly['model'].get('mean_depth')
status = get_status(anomaly)
if 'code' in status and status['code'] == FINISHED:
self.expected_mean_depth = None
if self.mean_depth is None or self.sample_size is None:
raise Exception("The anomaly data is not complete. "
"Score will"
" not be available")
else:
default_depth = (
2 * (DEPTH_FACTOR + \
math.log(self.sample_size - 1) - \