Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_export_to_sklearn_pipeline3(self):
from lale.lib.lale import ConcatFeatures
from lale.lib.sklearn import PCA
from lale.lib.sklearn import KNeighborsClassifier, LogisticRegression, SVC
from sklearn.feature_selection import SelectKBest
from lale.lib.sklearn import Nystroem
from sklearn.pipeline import FeatureUnion
lale_pipeline = ((PCA() >> SelectKBest(k=2)) & (Nystroem(random_state = 42) >> SelectKBest(k=3))
& (SelectKBest(k=3))) >> ConcatFeatures() >> SelectKBest(k=2) >> LogisticRegression()
trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
self.assertIsInstance(sklearn_pipeline.named_steps['featureunion'], FeatureUnion)
self.assertIsInstance(sklearn_pipeline.named_steps['selectkbest'], SelectKBest)
from sklearn.linear_model import LogisticRegression
self.assertIsInstance(sklearn_pipeline.named_steps['logisticregression'], LogisticRegression)
self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
def test_make_choice_with_instance(self):
from lale.operators import make_union, make_choice, make_pipeline
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target
tfm = PCA() | Nystroem() | NoOp()
with self.assertRaises(AttributeError):
trained = tfm.fit(X, y)
planned_pipeline1 = (OneHotEncoder | NoOp) >> tfm >> (LogisticRegression | KNeighborsClassifier)
planned_pipeline2 = (OneHotEncoder | NoOp) >> (PCA | Nystroem) >> (LogisticRegression | KNeighborsClassifier)
planned_pipeline3 = make_choice(OneHotEncoder, NoOp) >> make_choice(PCA, Nystroem) >> make_choice(LogisticRegression, KNeighborsClassifier)
def test_feature_preprocessor(self):
X_train, y_train = self.X_train, self.y_train
X_test, y_test = self.X_test, self.y_test
import importlib
module_name = ".".join(fproc_name.split('.')[0:-1])
class_name = fproc_name.split('.')[-1]
module = importlib.import_module(module_name)
class_ = getattr(module, class_name)
fproc = class_()
from lale.lib.sklearn.one_hot_encoder import OneHotEncoderImpl
if fproc._impl_class() == OneHotEncoderImpl:
#fproc = OneHotEncoder(handle_unknown = 'ignore')
#remove the hack when this is fixed
fproc = PCA()
#test_schemas_are_schemas
lale.type_checking.validate_is_schema(fproc.input_schema_fit())
lale.type_checking.validate_is_schema(fproc.input_schema_transform())
lale.type_checking.validate_is_schema(fproc.output_schema_transform())
lale.type_checking.validate_is_schema(fproc.hyperparam_schema())
#test_init_fit_transform
trained = fproc.fit(self.X_train, self.y_train)
predictions = trained.transform(self.X_test)
#test_predict_on_trainable
trained = fproc.fit(X_train, y_train)
fproc.transform(X_train)
#test_to_json
fproc.to_json()
def dont_test_with_gridsearchcv2_auto(self):
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, make_scorer
lr = LogisticRegression(random_state = 42)
pca = PCA(random_state = 42, svd_solver = 'arpack')
trainable = pca >> lr
from sklearn.pipeline import Pipeline
scikit_pipeline = Pipeline([(pca.name(), PCA(random_state = 42, svd_solver = 'arpack')), (lr.name(), LogisticRegression(random_state = 42))])
all_parameters = get_grid_search_parameter_grids(trainable, num_samples=1)
# otherwise the test takes too long
parameters = random.sample(all_parameters, 2)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
clf = GridSearchCV(scikit_pipeline, parameters, cv=2, scoring=make_scorer(accuracy_score))
iris = load_iris()
clf.fit(iris.data, iris.target)
predicted = clf.predict(iris.data)
accuracy_with_lale_operators = accuracy_score(iris.target, predicted)
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA as SklearnPCA
def test_fit_smaller_trials(self):
from sklearn.datasets import load_iris
from lale.lib.lale import TopKVotingClassifier
from lale.lib.sklearn import Nystroem
from sklearn.metrics import accuracy_score
ensemble = TopKVotingClassifier(estimator=(PCA() | Nystroem()) >> (LogisticRegression()|KNeighborsClassifier()), args_to_optimizer={'max_evals':3}, k=20)
trained = ensemble.fit(self.X_train, self.y_train)
final_ensemble = trained._impl._best_estimator
self.assertLessEqual(len(final_ensemble._impl._wrapped_model.estimators), 3)
def test_with_gridsearchcv_auto_wrapped_pipe1(self):
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, make_scorer
lr = LogisticRegression()
pca = PCA()
trainable = pca >> lr
with warnings.catch_warnings():
warnings.simplefilter("ignore")
from lale.lib.lale import GridSearchCV
clf = GridSearchCV(
estimator=trainable, lale_num_samples=1, lale_num_grids=1,
cv=2, scoring=make_scorer(accuracy_score))
iris = load_iris()
clf.fit(iris.data, iris.target)
with self.assertRaises(ValueError):
res = class_()
#test_schemas_are_schemas
lale.type_checking.validate_is_schema(class_.input_schema_fit())
lale.type_checking.validate_is_schema(class_.input_schema_predict())
lale.type_checking.validate_is_schema(class_.output_schema_predict())
lale.type_checking.validate_is_schema(class_.hyperparam_schema())
#test_init_fit_predict
from lale.operators import make_pipeline
pipeline1 = PCA() >> class_(operator=make_pipeline(LogisticRegression()))
trained = pipeline1.fit(X_train, y_train)
predictions = trained.predict(X_test)
pipeline2 = class_(operator=make_pipeline(PCA(), LogisticRegression()))
trained = pipeline2.fit(X_train, y_train)
predictions = trained.predict(X_test)
#test_with_hyperopt
from lale.lib.lale import Hyperopt
optimizer = Hyperopt(estimator=PCA >> class_(operator=make_pipeline(LogisticRegression())), max_evals = 1, show_progressbar=False)
trained_optimizer = optimizer.fit(X_train, y_train)
predictions = trained_optimizer.predict(X_test)
pipeline3 = class_(operator= PCA() >> (Nystroem & NoOp) >> ConcatFeatures >> LogisticRegression())
optimizer = Hyperopt(estimator=pipeline3, max_evals = 1, show_progressbar=False)
trained_optimizer = optimizer.fit(X_train, y_train)
predictions = trained_optimizer.predict(X_test)
pipeline4 = (PCA >> class_(operator=make_pipeline(Nystroem())) & class_(operator=make_pipeline(Nystroem()))) >> ConcatFeatures >> LogisticRegression()
optimizer = Hyperopt(estimator=pipeline4, max_evals = 1, scoring='roc_auc', show_progressbar=False)
def test_hyperparam_overriding_with_hyperopt(self):
pca1 = PCA(n_components = 3)
pca2 = PCA()
search_space1 = hyperopt_search_space(pca1)
search_space2 = hyperopt_search_space(pca2)
self.assertNotEqual(search_space1, search_space2)