How to use the mleap.bundle.serialize.MLeapDeserializer function in mleap

To help you get started, we’ve selected a few mleap examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
outputs = [{
                  "name": self.output_features,
                  "port": "output"
                }]

        self.serialize(self, path, model_name, attributes, inputs, outputs)

    def deserialize_from_bundle(self, node_path, node_name):
        attributes_map = {
            'operation': 'transform_type'
        }
        full_node_path = os.path.join(node_path, node_name)
        transformer = self.deserialize_single_input_output(self, full_node_path, attributes_map)
        return transformer

class StringMap(BaseEstimator, TransformerMixin, MLeapSerializer, MLeapDeserializer):

    def __init__(self, input_features=None, output_features=None, labels=None):
        self.op = 'string_map'
        self.name = "{}_{}".format(self.op, uuid.uuid4())
        self.input_features = input_features
        self.output_features = output_features
        self.serializable = True
        self.labels = labels
        if labels is not None:
            self.label_keys = self.labels.keys
            self.label_values = self.labels.values

    def fit(self, X, y=None, **fit_params):
        if self.labels is None:
            self.labels = dict(zip(self.label_keys, self.label_values))
        return self
github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
self.serialize(self, path, model_name, attributes, inputs, outputs)

    def deserialize_from_bundle(self, node_path, node_name):

        attributes_map = {
            'labels': 'classes_'
        }

        full_node_path = os.path.join(node_path, node_name)
        transformer = self.deserialize_single_input_output(self, full_node_path, attributes_map)

        return transformer


class MinMaxScalerSerializer(MLeapSerializer, MLeapDeserializer):
    """
    Scales features by the range of values using calculated min and max from training data.

    >>> data = pd.DataFrame([[1], [5], [6], [1]], columns=['col_a'])
    >>> minmax_scaler_tf = MinMaxScaler()
    >>> minmax_scaler_tf.mlinit(input_features='col_a', output_features='scaled_cont_features')

    >>> minmax_scaler_tf.fit_transform(data)
    >>> array([[ 0.],
    >>>      [ 0.8],
    >>>      [ 1.],
    >>>      [ 0.]])
    """
    def __init__(self):
        super(MinMaxScalerSerializer, self).__init__()
github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
# define node inputs and outputs
        inputs = [{
                  "name": transformer.input_features,
                  "port": "input"
                }]

        outputs = [{
                  "name": transformer.output_features,
                  "port": "output"
                }]

        self.serialize(transformer, path, model_name, attributes, inputs, outputs)


class StandardScalerSerializer(MLeapSerializer, MLeapDeserializer):
    """
    Standardizes features by removing the mean and scaling to unit variance using mean and standard deviation from
    training data.
    >>> data = pd.DataFrame([[1], [5], [6], [1]], columns=['col_a'])
    >>> standard_scaler_tf = StandardScaler()
    >>> standard_scaler_tf.mlinit(input_features='col_a', output_features='scaled_cont_features')
    >>> standard_scaler_tf.fit_transform(data)
    >>> array([[-0.98787834],
    >>>         [ 0.76834982],
    >>>         [ 1.20740686],
    >>>         [-0.98787834]])
    """
    def __init__(self):
        super(StandardScalerSerializer, self).__init__()

    def serialize_to_bundle(self, transformer, path, model_name):
github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
# compile tuples of model attributes to serialize
        attributes = list()
        attributes.append(("input_shapes", self.input_shapes))

        # define node inputs and outputs
        inputs = [{'name': x, 'port': 'input{}'.format(self.input_features.index(x))} for x in self.input_features]

        outputs = [{
                  "name": self.output_vector,
                  "port": "output"
                }]

        self.serialize(self, path, model_name, attributes, inputs, outputs)


class LabelEncoder(BaseEstimator, TransformerMixin, MLeapSerializer, MLeapDeserializer):
    """
    Copied from sklearn, but enables passing X and Y features, which allows this transformer
    to be used in Pipelines.

    Converts categorical values of a single column into categorical indices. This transformer should be followed by a
    NDArrayToDataFrame transformer to maintain a data structure required by scikit pipelines.

    NOTE: You can only LabelEncode/String Index one feature at a time!!!

    >>> data = pd.DataFrame([['a', 0], ['b', 1], ['b', 3], ['c', 1]], columns=['col_a', 'col_b'])
    >>> # Label Encoder for x1 Label
    >>> label_encoder_tf = LabelEncoder(input_features = ['col_a'] , output_features='col_a_label_le')
    >>> # Convert output of Label Encoder to Data Frame instead of 1d-array
    >>> n_dim_array_to_df_tf = NDArrayToDataFrame('col_a_label_le')
    >>> n_dim_array_to_df_tf.fit_transform(label_encoder_tf.fit_transform(data['col_a']))
github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
"name": self.output_features,
                  "port": "output"
                }]

        self.serialize(self, path, model_name, attributes, inputs, outputs)

    def deserialize_from_bundle(self, node_path, node_name):
        attributes_map = {
            'operation': 'transform_type'
        }
        full_node_path = os.path.join(node_path, node_name)
        transformer = self.deserialize_single_input_output(self, full_node_path, attributes_map)
        return transformer


class MathBinary(BaseEstimator, TransformerMixin, MLeapSerializer, MLeapDeserializer):
    """
    Performs basic math operations on two features (columns of a DataFrame). Supported operations include:
        - add: Add x + y
        - sub: Subtract x - y
        - mul: Multiply x * y
        - div: Divide x / y
        - rem: Remainder x % y
        - logn: LogN log(x) / log(y)
        - pow: Power x^y
    These transforms work on 2-dimensional arrays/vectors, where the the first column is x and second column is y.
    Inputs need to be floats.
    """
    def __init__(self, input_features=None, output_features=None, transform_type=None):
        self.valid_transforms = ['add', 'sub', 'mul', 'div', 'rem', 'logn', 'pow']
        self.op = 'math_binary'
        self.name = "{}_{}".format(self.op, uuid.uuid4())
github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
full_node_path = os.path.join(node_path, node_name)
        transformer = self.deserialize_single_input_output(transformer, full_node_path, attributes_map)

        # Set Sparse = False
        transformer.sparse = False

        # Set Feature Indices
        n_values = np.hstack([[0], [transformer.n_values_]])
        indices = np.cumsum(n_values)
        transformer.feature_indices_ = indices
        transformer.active_features_ = range(0, transformer.n_values_)

        return transformer


class BinarizerSerializer(MLeapSerializer, MLeapDeserializer):
    def __init__(self):
        super(BinarizerSerializer, self).__init__()



    def serialize_to_bundle(self, transformer, path, model_name):

        # compile tuples of model attributes to serialize
        attributes = list()
        attributes.append(('threshold', float(transformer.threshold)))
        attributes.append(("input_shapes", transformer.input_shapes))

        # define node inputs and outputs
        inputs = [{
                  "name": transformer.input_features,
                  "port": "input"
github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
outputs = [{
                  "name": transformer.output_features,
                  "port": "output"
                }]

        self.serialize(transformer, path, model_name, attributes, inputs, outputs)

    def deserialize_from_bundle(self, transformer, node_path, node_name):

        full_node_path = os.path.join(node_path, node_name)
        transformer = self.deserialize_single_input_output(transformer, full_node_path)

        return transformer


class PolynomialExpansionSerializer(MLeapSerializer, MLeapDeserializer):
    def __init__(self):
        super(PolynomialExpansionSerializer, self).__init__()

    def serialize_to_bundle(self, transformer, path, model_name):

        # compile tuples of model attributes to serialize
        attributes = list()
        attributes.append(('combinations', str(transformer.get_feature_names()).replace("'", "").replace(", ", ",")))

        # define node inputs and outputs
        inputs = [{
                  "name": transformer.input_features,
                  "port": "input"
                }]

        outputs = [{
github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
# Set Additional Attributes
        if 'mean_' in transformer.__dict__:
            transformer.with_mean = True
        else:
            transformer.with_mean = False

        if 'scale_' in transformer.__dict__:
            transformer.with_std = True
            transformer.var = np.square(transformer.scale_)
        else:
            transformer.with_std = False

        return transformer


class OneHotEncoderSerializer(MLeapSerializer, MLeapDeserializer):
    """
    A one-hot encoder maps a single column of categorical indices to a
    column of binary vectors, which can be re-assamble back to a DataFrame using a ToDense transformer.
    """
    def __init__(self):
        super(OneHotEncoderSerializer, self).__init__()

    def serialize_to_bundle(self, transformer, path, model_name):

        # compile tuples of model attributes to serialize
        attributes = list()
        attributes.append(('size', transformer.n_values_.tolist()[0]))
        # the default sklearn OneHotEncoder doesn't support 'drop_last'
        # see mleap.sklearn.extensions.data for OneHotEncoder that does support 'drop_last'
        attributes.append(('drop_last', False))
github combust / mleap / python / mleap / sklearn / preprocessing / data.py View on Github external
if os.path.exists("{}/{}".format(path, model_name)):
            shutil.rmtree("{}/{}".format(path, model_name))

        model_dir = "{}/{}".format(path, model_name)
        os.mkdir(model_dir)

        # Write bundle file
        with open("{}/{}".format(model_dir, 'model.json'), 'w') as outfile:
            json.dump(self.get_mleap_model(), outfile, indent=3)

        # Write node file
        with open("{}/{}".format(model_dir, 'node.json'), 'w') as outfile:
            json.dump(self.get_mleap_node(), outfile, indent=3)


class MathUnary(BaseEstimator, TransformerMixin, MLeapSerializer, MLeapDeserializer):
    """
    Performs basic math operations on a single feature (column of a DataFrame). Supported operations include:
        - log
        - exp
        - sqrt
        - sin
        - cos
        - tan
    Note, currently we only support 1d-arrays.
    Inputs need to be floats.
    """
    def __init__(self, input_features=None, output_features=None, transform_type=None):
        self.valid_transforms = ['log', 'exp', 'sqrt', 'sin', 'cos', 'tan']
        self.op = 'math_unary'
        self.name = "{}_{}".format(self.op, uuid.uuid4())
        self.input_features = input_features