Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_StackingEstimator_4():
"""Assert that the StackingEstimator worked as expected in scikit-learn pipeline in regression."""
stack_reg = StackingEstimator(estimator=RandomForestRegressor(random_state=42))
meta_reg = Lasso(random_state=42)
sklearn_pipeline = make_pipeline(stack_reg, meta_reg)
# fit in pipeline
sklearn_pipeline.fit(training_features_r, training_target_r)
# fit step by step
stack_reg.fit(training_features_r, training_target_r)
X_reg_transformed = stack_reg.transform(training_features_r)
meta_reg.fit(X_reg_transformed, training_target_r)
# scoring
score = meta_reg.score(X_reg_transformed, training_target_r)
pipeline_score = sklearn_pipeline.score(training_features_r, training_target_r)
assert np.allclose(score, pipeline_score)
# test cv score
cv_score = np.mean(cross_val_score(sklearn_pipeline, training_features_r, training_target_r, cv=3, scoring='r2'))
known_cv_score = 0.7989564328211737
def test_StackingEstimator_1():
"""Assert that the StackingEstimator returns transformed X with synthetic features in classification."""
clf = RandomForestClassifier(random_state=42)
stack_clf = StackingEstimator(estimator=RandomForestClassifier(random_state=42))
# fit
clf.fit(training_features, training_target)
stack_clf.fit(training_features, training_target)
# get transformd X
X_clf_transformed = stack_clf.transform(training_features)
assert np.allclose(clf.predict(training_features), X_clf_transformed[:, 0])
assert np.allclose(clf.predict_proba(training_features), X_clf_transformed[:, 1:1 + len(np.unique(training_target))])
def test_StackingEstimator_2():
"""Assert that the StackingEstimator returns transformed X with a synthetic feature in regression."""
reg = RandomForestRegressor(random_state=42)
stack_reg = StackingEstimator(estimator=RandomForestRegressor(random_state=42))
# fit
reg.fit(training_features_r, training_target_r)
stack_reg.fit(training_features_r, training_target_r)
# get transformd X
X_reg_transformed = stack_reg.transform(training_features_r)
assert np.allclose(reg.predict(training_features_r), X_reg_transformed[:, 0])
def ptop_2030(X, Y, Test, uid, online=0):
train_X = X.as_matrix()
train_Y = Y.as_matrix()
test_X = Test.as_matrix()
X_train, X_test, y_train, y_test = train_test_split(train_X, train_Y, test_size=0.2, random_state=1)
# Score on the training set was:-3.207903288331976
exported_pipeline = make_pipeline(
SelectFwe(score_func=f_regression, alpha=0.038),
StackingEstimator(estimator=LassoLarsCV(normalize=False)),
StackingEstimator(
estimator=GradientBoostingRegressor(alpha=0.9, learning_rate=1.0, loss="quantile", max_depth=4,
max_features=0.8, min_samples_leaf=7, min_samples_split=17,
n_estimators=100, subsample=0.1)),
ExtraTreesRegressor(bootstrap=True, max_features=0.8500000000000001, min_samples_leaf=12, min_samples_split=10,
n_estimators=100)
)
exported_pipeline.fit(X_train, y_train)
print("train:--------------")
predict = exported_pipeline.predict(X_train)
error(predict, y_train)
print("test:---------------")
predict = exported_pipeline.predict(X_test)
error(predict, y_test)
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from sklearn.tree import DecisionTreeClassifier
from tpot.builtins import StackingEstimator
# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
train_test_split(features, tpot_data['target'], random_state=None)
# Average CV score on the training set was:0.853347788745
exported_pipeline = make_pipeline(
StackingEstimator(estimator=LogisticRegression(C=10.0, dual=False, penalty="l2")),
DecisionTreeClassifier(criterion="gini", max_depth=7, min_samples_leaf=5, min_samples_split=7)
)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
def ptot_result(X, Y, Test, uid, online=0):
train_X = X.as_matrix()
train_Y = Y.as_matrix()
test_X = Test.as_matrix()
X_train, X_test, y_train, y_test = train_test_split(train_X, train_Y, test_size=0.2, random_state=1)
exported_pipeline = make_pipeline(
StackingEstimator(estimator=RidgeCV()),
StackingEstimator(estimator=GradientBoostingRegressor(alpha=0.99, learning_rate=0.01, loss="huber", max_depth=6,
max_features=0.45, min_samples_leaf=12,
min_samples_split=18, n_estimators=100,
subsample=0.7500000000000001)),
RandomForestRegressor(bootstrap=False, max_features=0.05, min_samples_leaf=11, min_samples_split=8,
n_estimators=100)
)
exported_pipeline.fit(X_train, y_train)
print("train:--------------")
predict = exported_pipeline.predict(X_train)
error(predict, y_train)
print("test:---------------")
predict = exported_pipeline.predict(X_test)
error(predict, y_test)
def ptop_2040(X, Y, Test, uid, online=0):
train_X = X.as_matrix()
train_Y = Y.as_matrix()
test_X = Test.as_matrix()
X_train, X_test, y_train, y_test = train_test_split(train_X, train_Y, test_size=0.2, random_state=1)
exported_pipeline = make_pipeline(
StackingEstimator(estimator=LassoLarsCV(normalize=False)),
RobustScaler(),
GradientBoostingRegressor(alpha=0.8, learning_rate=0.1, loss="ls", max_depth=5, max_features=0.55,
min_samples_leaf=12, min_samples_split=14, n_estimators=100, subsample=0.5)
)
exported_pipeline.fit(X_train, y_train)
print("train:--------------")
predict = exported_pipeline.predict(X_train)
error(predict, y_train)
print("test:---------------")
predict = exported_pipeline.predict(X_test)
error(predict, y_test)
# online
if online == 1:
from tpot.builtins import StackingEstimator
# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
train_test_split(features, tpot_data['target'].values, random_state=42)
imputer = Imputer(strategy="median")
imputer.fit(training_features)
training_features = imputer.transform(training_features)
testing_features = imputer.transform(testing_features)
# Score on the training set was:-8.56166141363137
exported_pipeline = make_pipeline(
StackingEstimator(estimator=LassoLarsCV(normalize=True)),
GradientBoostingRegressor(alpha=0.95, learning_rate=0.1, loss="lad", max_depth=7, max_features=0.7500000000000001, min_samples_leaf=3, min_samples_split=18, n_estimators=100, subsample=0.6000000000000001)
)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)