Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
outputs = [{
"name": self.output_features,
"port": "output"
}]
self.serialize(self, path, model_name, attributes, inputs, outputs)
def deserialize_from_bundle(self, node_path, node_name):
attributes_map = {
'operation': 'transform_type'
}
full_node_path = os.path.join(node_path, node_name)
transformer = self.deserialize_single_input_output(self, full_node_path, attributes_map)
return transformer
class StringMap(BaseEstimator, TransformerMixin, MLeapSerializer, MLeapDeserializer):
def __init__(self, input_features=None, output_features=None, labels=None):
self.op = 'string_map'
self.name = "{}_{}".format(self.op, uuid.uuid4())
self.input_features = input_features
self.output_features = output_features
self.serializable = True
self.labels = labels
if labels is not None:
self.label_keys = self.labels.keys
self.label_values = self.labels.values
def fit(self, X, y=None, **fit_params):
if self.labels is None:
self.labels = dict(zip(self.label_keys, self.label_values))
return self
self.serialize(self, path, model_name, attributes, inputs, outputs)
def deserialize_from_bundle(self, node_path, node_name):
attributes_map = {
'labels': 'classes_'
}
full_node_path = os.path.join(node_path, node_name)
transformer = self.deserialize_single_input_output(self, full_node_path, attributes_map)
return transformer
class MinMaxScalerSerializer(MLeapSerializer, MLeapDeserializer):
"""
Scales features by the range of values using calculated min and max from training data.
>>> data = pd.DataFrame([[1], [5], [6], [1]], columns=['col_a'])
>>> minmax_scaler_tf = MinMaxScaler()
>>> minmax_scaler_tf.mlinit(input_features='col_a', output_features='scaled_cont_features')
>>> minmax_scaler_tf.fit_transform(data)
>>> array([[ 0.],
>>> [ 0.8],
>>> [ 1.],
>>> [ 0.]])
"""
def __init__(self):
super(MinMaxScalerSerializer, self).__init__()
# define node inputs and outputs
inputs = [{
"name": transformer.input_features,
"port": "input"
}]
outputs = [{
"name": transformer.output_features,
"port": "output"
}]
self.serialize(transformer, path, model_name, attributes, inputs, outputs)
class StandardScalerSerializer(MLeapSerializer, MLeapDeserializer):
"""
Standardizes features by removing the mean and scaling to unit variance using mean and standard deviation from
training data.
>>> data = pd.DataFrame([[1], [5], [6], [1]], columns=['col_a'])
>>> standard_scaler_tf = StandardScaler()
>>> standard_scaler_tf.mlinit(input_features='col_a', output_features='scaled_cont_features')
>>> standard_scaler_tf.fit_transform(data)
>>> array([[-0.98787834],
>>> [ 0.76834982],
>>> [ 1.20740686],
>>> [-0.98787834]])
"""
def __init__(self):
super(StandardScalerSerializer, self).__init__()
def serialize_to_bundle(self, transformer, path, model_name):
# compile tuples of model attributes to serialize
attributes = list()
attributes.append(("input_shapes", self.input_shapes))
# define node inputs and outputs
inputs = [{'name': x, 'port': 'input{}'.format(self.input_features.index(x))} for x in self.input_features]
outputs = [{
"name": self.output_vector,
"port": "output"
}]
self.serialize(self, path, model_name, attributes, inputs, outputs)
class LabelEncoder(BaseEstimator, TransformerMixin, MLeapSerializer, MLeapDeserializer):
"""
Copied from sklearn, but enables passing X and Y features, which allows this transformer
to be used in Pipelines.
Converts categorical values of a single column into categorical indices. This transformer should be followed by a
NDArrayToDataFrame transformer to maintain a data structure required by scikit pipelines.
NOTE: You can only LabelEncode/String Index one feature at a time!!!
>>> data = pd.DataFrame([['a', 0], ['b', 1], ['b', 3], ['c', 1]], columns=['col_a', 'col_b'])
>>> # Label Encoder for x1 Label
>>> label_encoder_tf = LabelEncoder(input_features = ['col_a'] , output_features='col_a_label_le')
>>> # Convert output of Label Encoder to Data Frame instead of 1d-array
>>> n_dim_array_to_df_tf = NDArrayToDataFrame('col_a_label_le')
>>> n_dim_array_to_df_tf.fit_transform(label_encoder_tf.fit_transform(data['col_a']))
"name": self.output_features,
"port": "output"
}]
self.serialize(self, path, model_name, attributes, inputs, outputs)
def deserialize_from_bundle(self, node_path, node_name):
attributes_map = {
'operation': 'transform_type'
}
full_node_path = os.path.join(node_path, node_name)
transformer = self.deserialize_single_input_output(self, full_node_path, attributes_map)
return transformer
class MathBinary(BaseEstimator, TransformerMixin, MLeapSerializer, MLeapDeserializer):
"""
Performs basic math operations on two features (columns of a DataFrame). Supported operations include:
- add: Add x + y
- sub: Subtract x - y
- mul: Multiply x * y
- div: Divide x / y
- rem: Remainder x % y
- logn: LogN log(x) / log(y)
- pow: Power x^y
These transforms work on 2-dimensional arrays/vectors, where the the first column is x and second column is y.
Inputs need to be floats.
"""
def __init__(self, input_features=None, output_features=None, transform_type=None):
self.valid_transforms = ['add', 'sub', 'mul', 'div', 'rem', 'logn', 'pow']
self.op = 'math_binary'
self.name = "{}_{}".format(self.op, uuid.uuid4())
full_node_path = os.path.join(node_path, node_name)
transformer = self.deserialize_single_input_output(transformer, full_node_path, attributes_map)
# Set Sparse = False
transformer.sparse = False
# Set Feature Indices
n_values = np.hstack([[0], [transformer.n_values_]])
indices = np.cumsum(n_values)
transformer.feature_indices_ = indices
transformer.active_features_ = range(0, transformer.n_values_)
return transformer
class BinarizerSerializer(MLeapSerializer, MLeapDeserializer):
def __init__(self):
super(BinarizerSerializer, self).__init__()
def serialize_to_bundle(self, transformer, path, model_name):
# compile tuples of model attributes to serialize
attributes = list()
attributes.append(('threshold', float(transformer.threshold)))
attributes.append(("input_shapes", transformer.input_shapes))
# define node inputs and outputs
inputs = [{
"name": transformer.input_features,
"port": "input"
outputs = [{
"name": transformer.output_features,
"port": "output"
}]
self.serialize(transformer, path, model_name, attributes, inputs, outputs)
def deserialize_from_bundle(self, transformer, node_path, node_name):
full_node_path = os.path.join(node_path, node_name)
transformer = self.deserialize_single_input_output(transformer, full_node_path)
return transformer
class PolynomialExpansionSerializer(MLeapSerializer, MLeapDeserializer):
def __init__(self):
super(PolynomialExpansionSerializer, self).__init__()
def serialize_to_bundle(self, transformer, path, model_name):
# compile tuples of model attributes to serialize
attributes = list()
attributes.append(('combinations', str(transformer.get_feature_names()).replace("'", "").replace(", ", ",")))
# define node inputs and outputs
inputs = [{
"name": transformer.input_features,
"port": "input"
}]
outputs = [{
# Set Additional Attributes
if 'mean_' in transformer.__dict__:
transformer.with_mean = True
else:
transformer.with_mean = False
if 'scale_' in transformer.__dict__:
transformer.with_std = True
transformer.var = np.square(transformer.scale_)
else:
transformer.with_std = False
return transformer
class OneHotEncoderSerializer(MLeapSerializer, MLeapDeserializer):
"""
A one-hot encoder maps a single column of categorical indices to a
column of binary vectors, which can be re-assamble back to a DataFrame using a ToDense transformer.
"""
def __init__(self):
super(OneHotEncoderSerializer, self).__init__()
def serialize_to_bundle(self, transformer, path, model_name):
# compile tuples of model attributes to serialize
attributes = list()
attributes.append(('size', transformer.n_values_.tolist()[0]))
# the default sklearn OneHotEncoder doesn't support 'drop_last'
# see mleap.sklearn.extensions.data for OneHotEncoder that does support 'drop_last'
attributes.append(('drop_last', False))
if os.path.exists("{}/{}".format(path, model_name)):
shutil.rmtree("{}/{}".format(path, model_name))
model_dir = "{}/{}".format(path, model_name)
os.mkdir(model_dir)
# Write bundle file
with open("{}/{}".format(model_dir, 'model.json'), 'w') as outfile:
json.dump(self.get_mleap_model(), outfile, indent=3)
# Write node file
with open("{}/{}".format(model_dir, 'node.json'), 'w') as outfile:
json.dump(self.get_mleap_node(), outfile, indent=3)
class MathUnary(BaseEstimator, TransformerMixin, MLeapSerializer, MLeapDeserializer):
"""
Performs basic math operations on a single feature (column of a DataFrame). Supported operations include:
- log
- exp
- sqrt
- sin
- cos
- tan
Note, currently we only support 1d-arrays.
Inputs need to be floats.
"""
def __init__(self, input_features=None, output_features=None, transform_type=None):
self.valid_transforms = ['log', 'exp', 'sqrt', 'sin', 'cos', 'tan']
self.op = 'math_unary'
self.name = "{}_{}".format(self.op, uuid.uuid4())
self.input_features = input_features