Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_increase_num_rows(self):
from test.mock_custom_operators import IncreaseRows
increase_rows = IncreaseRows()
trainable = increase_rows >> LogisticRegression()
iris = sklearn.datasets.load_iris()
X, y = iris.data, iris.target
trained = trainable.fit(X, y)
predicted = trained.transform(X, y)
def test_trained_pipeline_freeze_trainable(self):
from lale.lib.sklearn import MinMaxScaler, LogisticRegression
from lale.operators import TrainedPipeline
trainable = MinMaxScaler() >> LogisticRegression()
X = [[0.0], [1.0], [2.0]]
y = [0.0, 0.0, 1.0]
liquid = trainable.fit(X, y)
self.assertIsInstance(liquid, TrainedPipeline)
self.assertFalse(liquid.is_frozen_trainable())
frozen = liquid.freeze_trainable()
self.assertFalse(liquid.is_frozen_trainable())
self.assertTrue(frozen.is_frozen_trainable())
self.assertIsInstance(frozen, TrainedPipeline)
def test_comparison_with_scikit(self):
import warnings
warnings.filterwarnings("ignore")
from lale.lib.sklearn import PCA
import sklearn.datasets
from lale.helpers import cross_val_score
pca = PCA(n_components=3, random_state=42, svd_solver='arpack')
nys = Nystroem(n_components=10, random_state=42)
concat = ConcatFeatures()
lr = LogisticRegression(random_state=42, C=0.1)
trainable = (pca & nys) >> concat >> lr
digits = sklearn.datasets.load_digits()
X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42)
cv_results = cross_val_score(trainable, X, y)
cv_results = ['{0:.1%}'.format(score) for score in cv_results]
from sklearn.pipeline import make_pipeline, FeatureUnion
from sklearn.decomposition import PCA as SklearnPCA
from sklearn.kernel_approximation import Nystroem as SklearnNystroem
from sklearn.linear_model import LogisticRegression as SklearnLR
from sklearn.model_selection import cross_val_score
union = FeatureUnion([("pca", SklearnPCA(n_components=3, random_state=42, svd_solver='arpack')),
("nys", SklearnNystroem(n_components=10, random_state=42))])
lr = SklearnLR(random_state=42, C=0.1)
pipeline = make_pipeline(union, lr)
def test_clone_with_scikit1(self):
lr = LogisticRegression()
lr.get_params()
from sklearn.base import clone
lr_clone = clone(lr)
self.assertNotEqual(lr, lr_clone)
self.assertNotEqual(lr._impl, lr_clone._impl)
iris = sklearn.datasets.load_iris()
trained_lr = lr.fit(iris.data, iris.target)
predicted = trained_lr.predict(iris.data)
cloned_trained_lr = clone(trained_lr)
self.assertNotEqual(trained_lr._impl, cloned_trained_lr._impl)
predicted_clone = cloned_trained_lr.predict(iris.data)
for i in range(len(iris.target)):
self.assertEqual(predicted[i], predicted_clone[i])
# Testing clone with pipelines having OperatorChoice
def test_multiple_estimators_predict_predict_proba(self) :
pipeline = (
StandardScaler() >>
( LogisticRegression() & PCA() ) >> ConcatFeatures() >>
( NoOp() & LinearSVC() ) >> ConcatFeatures() >>
KNeighborsClassifier()
)
pipeline.fit(self.X_train, self.y_train)
tmp = pipeline.predict_proba(self.X_test)
tmp = pipeline.predict(self.X_test)
def test_import_as_1(self):
from lale.lib.sklearn import LogisticRegression as LR
pipeline = LR(solver='saga', C=0.9)
expected = """from lale.lib.sklearn import LogisticRegression as LR
import lale
lale.wrap_imported_operators()
pipeline = LR(solver='saga', C=0.9)"""
self._roundtrip(expected, lale.pretty_print.to_string(pipeline))
import numpy as np
from sklearn import svm, datasets
from sklearn.model_selection import cross_val_score
# Import ConfigSpace and different types of parameters
from smac.configspace import ConfigurationSpace
# Import SMAC-utilities
from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC as orig_SMAC
tfm = PCA() | Nystroem() | NoOp()
planned_pipeline1 = (OneHotEncoder(handle_unknown = 'ignore', sparse = False) | NoOp()) >> tfm >> (LogisticRegression() | KNeighborsClassifier())
cs:ConfigurationSpace = get_smac_space(planned_pipeline1, lale_num_grids=1)
# Scenario object
scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime)
"runcount-limit": 1, # maximum function evaluations
"cs": cs, # configuration space
"deterministic": "true"
})
# Optimize, using a SMAC-object
tae = iris_fmin_tae(planned_pipeline1, num_folds=2)
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = orig_SMAC(scenario=scenario, rng=np.random.RandomState(42),
tae_runner=tae)
def test_clone_with_scikit2(self):
lr = LogisticRegression()
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.datasets import load_iris
pca = PCA()
trainable = pca >> lr
from sklearn.base import clone
iris = load_iris()
X, y = iris.data, iris.target
trainable2 = clone(trainable)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
result = cross_val_score(trainable, X, y,
scoring=make_scorer(accuracy_score), cv=2)
result2 = cross_val_score(trainable2, X, y,
scoring=make_scorer(accuracy_score), cv=2)
for i in range(len(result)):
>>> from sklearn import datasets
>>> diabetes = datasets.load_diabetes()
>>> X = diabetes.data[:150]
>>> y = diabetes.target[:150]
>>> trained = clf.fit(X, y)
>>> predictions = trained.predict(X)
Other scoring metrics:
>>> clf = SMAC(estimator=lr, scoring=make_scorer(f1_score, average='macro'), cv=3, max_evals=2)
"""
self.max_evals = max_evals
if estimator is None:
self.estimator = LogisticRegression()
else:
self.estimator = estimator
self.search_space:ConfigurationSpace = get_smac_space(self.estimator, lale_num_grids=lale_num_grids)
self.scoring = scoring
self.best_score = best_score
self.handle_cv_failure = handle_cv_failure
self.cv = cv
self.max_opt_time = max_opt_time
# Scenario object
scenario_options = {"run_obj": "quality", # we optimize quality (alternatively runtime)
"runcount-limit": self.max_evals, # maximum function evaluations
"cs": self.search_space, # configuration space
"deterministic": "true",
"abort_on_first_run_crash": False,
}
def fit(self, X, y):
if self._hyperparams['estimator'] is None:
op = lale.lib.sklearn.LogisticRegression
else:
op = self._hyperparams['estimator']
observed_op = op
obs = self._hyperparams['observer']
if obs is not None:
observed_op = Observing(op=op, observer=obs)
hp_grid = self._hyperparams['hp_grid']
if hp_grid is None:
hp_grid = lale.search.lale_grid_search_cv.get_parameter_grids(
observed_op,
num_samples=self._hyperparams['lale_num_samples'],
num_grids=self._hyperparams['lale_num_grids'],
pgo=self._hyperparams['pgo'])
if not hp_grid and isinstance(op, lale.operators.IndividualOp):