Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def wrap_imported_operators():
calling_frame = inspect.stack()[1][0]
symtab = calling_frame.f_globals
for name, impl in symtab.items():
if inspect.isclass(impl) and not issubclass(impl, Operator):
module = impl.__module__.split('.')[0]
klass = impl.__name__
try:
m = importlib.import_module('lale.lib.' + module)
symtab[name] = clone_op(getattr(m, klass), name)
logger.info(f'Lale:Wrapped known operator:{name}')
except (ModuleNotFoundError, AttributeError):
try:
m = importlib.import_module('lale.lib.autogen')
symtab[name] = clone_op(getattr(m, klass), name)
logger.info(f'Lale:Wrapped autogen operator:{name}')
except (ModuleNotFoundError, AttributeError):
if hasattr(impl, 'fit') and (
hasattr(impl, 'predict') or hasattr(impl, 'transform')):
logger.info(f'Lale:Wrapped unkwnown operator:{name}')
symtab[name] = make_operator(impl=impl, name=name)
def fit(self, X, y=None, **fit_params)->'TrainedPipeline':
X = lale.datasets.data_schemas.add_schema(X)
y = lale.datasets.data_schemas.add_schema(y)
self.validate_schema(X, y)
trained_steps:List[TrainedOperator] = [ ]
outputs:Dict[Operator, Any] = { }
meta_outputs:Dict[Operator, Any] = {}
edges:List[Tuple[TrainableOpType, TrainableOpType]] = self.edges()
trained_map:Dict[TrainableOpType, TrainedOperator] = {}
sink_nodes = self._find_sink_nodes()
for operator in self._steps:
preds = self._preds[operator]
if len(preds) == 0:
inputs = [X]
meta_data_inputs:Dict[Operator, Any] = {}
else:
inputs = [outputs[pred] for pred in preds]
#we create meta_data_inputs as a dictionary with metadata from all previous steps
#Note that if multiple previous steps generate the same key, it will retain only one of those.
meta_data_inputs = {key: meta_outputs[pred][key] for pred in preds
from lale.operators import PlannedOperator, Operator, BasePipeline, OperatorChoice, IndividualOp
from lale.operators import make_choice, make_pipeline, get_pipeline_of_applicable_type
from lale.lib.lale import NoOp
from typing import Optional
from lale.sklearn_compat import clone_op
import random
class NonTerminal(Operator):
""" Abstract operator for non-terminal grammar rules.
"""
def __init__(self, name):
self._name = name
def _lale_clone(self, cloner):
return NonTerminal(self.name())
def _has_same_impl(self):
pass
def is_supervised(self):
return False
def validate_schema(self, X, y=None):
raise NotImplementedError() #TODO
def __init__(self, base_estimator=None, n_estimators=50, learning_rate=1.0, loss='linear', random_state=None):
if isinstance(base_estimator, lale.operators.Operator):
if isinstance(base_estimator, lale.operators.IndividualOp):
base_estimator = base_estimator._impl_instance()._wrapped_model
else:
raise ValueError("If base_estimator is a Lale operator, it needs to be an individual operator. ")
self._hyperparams = {
'base_estimator': base_estimator,
'n_estimators': n_estimators,
'learning_rate': learning_rate,
'loss': loss,
'random_state': random_state}
self._wrapped_model = SKLModel(**self._hyperparams)
pass
@abstractmethod
def is_supervised(self)->bool:
"""Checks if this operator needs labeled data for learning.
Returns
-------
bool
True if the fit method requires a y argument.
"""
pass
Operator.__doc__ = cast(str, Operator.__doc__) + '\n' + _combinators_docstrings
class PlannedOperator(Operator):
"""Abstract class for Lale operators in the planned lifecycle state."""
def auto_configure(self, X, y = None, optimizer = None, cv = None, scoring = None, **kwargs)->'TrainableOperator':
"""
Perform combined algorithm selection and hyperparameter tuning on this planned operator.
Parameters
----------
X:
Features that conform to the X property of input_schema_fit.
y: optional
Labels that conform to the y property of input_schema_fit.
Default is None.
optimizer:
lale.lib.lale.HyperoptCV or lale.lib.lale.GridSearchCV
default is None.
def __init__(self, base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None):
if isinstance(base_estimator, lale.operators.Operator):
if isinstance(base_estimator, lale.operators.IndividualOp):
base_estimator = base_estimator._impl_instance()._wrapped_model
else:
raise ValueError("If base_estimator is a Lale operator, it needs to be an individual operator. ")
self._hyperparams = {
'base_estimator': base_estimator,
'n_estimators': n_estimators,
'learning_rate': learning_rate,
'algorithm': algorithm,
'random_state': random_state}
self._wrapped_model = SKLModel(**self._hyperparams)
def _hps_to_json_rec(hps, cls2label: Dict[str, str], gensym: _GenSym, steps) -> Any:
if isinstance(hps, lale.operators.Operator):
step_uid, step_jsn = _op_to_json_rec(hps, cls2label, gensym)
steps[step_uid] = step_jsn
return {'$ref': f'../steps/{step_uid}'}
elif isinstance(hps, dict):
return {hp_name: _hps_to_json_rec(hp_val, cls2label, gensym, steps)
for hp_name, hp_val in hps.items()}
elif isinstance(hps, tuple):
return tuple([_hps_to_json_rec(hp_val, cls2label, gensym, steps)
for hp_val in hps])
elif isinstance(hps, list):
return [_hps_to_json_rec(hp_val, cls2label, gensym, steps)
for hp_val in hps]
else:
return hps
s_typed = [ret_arr]
# TODO: more!
assert not s_all
ret_all = []
ret_main = s_extra if s_extra else {}
if s_type_for_optimizer is not None:
ret_main["laleType"] = s_type_for_optimizer
if s_enum:
# we should simplify these as for s_not_enum
ret_main['enum']=list(s_enum)
# now, we do some extra work to keep 'laleType':'operator' annotations
if s_type_for_optimizer is None:
from lale.operators import Operator
if all(isinstance(x,Operator) for x in s_enum):
# All the enumeration values are operators
# This means it is probably an operator schema
# which might have been missed if
# this is being allOf'ed with an anyOfList
if s_any and all(hasAnyOperatorSchemas(s) for s in s_any):
ret_main["laleType"] = 'operator'
return ret_main
if ret_main:
if s_typed:
s_typed[0] = {**ret_main, **s_typed[0]}
elif s_other:
s_other[0] = {**ret_main, **s_other[0]}
else:
ret_all.append(ret_main)
if s_typed:
""" Method for cloning a lale operator, currently intended for internal use
"""
pass
@abstractmethod
def is_supervised(self)->bool:
"""Checks if this operator needs labeled data for learning.
Returns
-------
bool
True if the fit method requires a y argument.
"""
pass
Operator.__doc__ = cast(str, Operator.__doc__) + '\n' + _combinators_docstrings
class PlannedOperator(Operator):
"""Abstract class for Lale operators in the planned lifecycle state."""
def auto_configure(self, X, y = None, optimizer = None, cv = None, scoring = None, **kwargs)->'TrainableOperator':
"""
Perform combined algorithm selection and hyperparameter tuning on this planned operator.
Parameters
----------
X:
Features that conform to the X property of input_schema_fit.
y: optional
Labels that conform to the y property of input_schema_fit.
Default is None.
optimizer:
else:
steps: Dict[str, JSON_TYPE] = {}
jsn['hyperparams'] = _hps_to_json_rec(
op.hyperparams(), cls2label, gensym, steps)
if len(steps) > 0:
jsn['steps'] = steps
jsn['is_frozen_trainable'] = op.is_frozen_trainable()
if isinstance(op, lale.operators.TrainedIndividualOp):
if hasattr(op._impl, 'fit'):
jsn['coefs'] = 'coefs_not_available'
else:
jsn['coefs'] = None
jsn['is_frozen_trained'] = op.is_frozen_trained()
elif isinstance(op, lale.operators.BasePipeline):
uid = gensym('pipeline')
child2uid: Dict[lale.operators.Operator, str] = {}
child2jsn: Dict[lale.operators.Operator, JSON_TYPE] = {}
for idx, child in enumerate(op.steps()):
child_uid, child_jsn = _op_to_json_rec(child, cls2label, gensym)
child2uid[child] = child_uid
child2jsn[child] = child_jsn
jsn['edges'] = [[child2uid[x], child2uid[y]] for x, y in op.edges()]
jsn['steps'] = {child2uid[z]: child2jsn[z] for z in op.steps()}
elif isinstance(op, lale.operators.OperatorChoice):
jsn['operator'] = 'OperatorChoice'
uid = gensym('choice')
jsn['state'] = 'planned'
jsn['steps'] = {}
for step in op.steps():
child_uid, child_jsn = _op_to_json_rec(step, cls2label, gensym)
jsn['steps'][child_uid] = child_jsn
return uid, jsn