How to use the tpot.builtins.StackingEstimator function in TPOT

To help you get started, we’ve selected a few TPOT examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EpistasisLab / tpot / tests / stacking_estimator_tests.py View on Github external
def test_StackingEstimator_4():
    """Assert that the StackingEstimator worked as expected in scikit-learn pipeline in regression."""
    stack_reg = StackingEstimator(estimator=RandomForestRegressor(random_state=42))
    meta_reg = Lasso(random_state=42)
    sklearn_pipeline = make_pipeline(stack_reg, meta_reg)
    # fit in pipeline
    sklearn_pipeline.fit(training_features_r, training_target_r)
    # fit step by step
    stack_reg.fit(training_features_r, training_target_r)
    X_reg_transformed = stack_reg.transform(training_features_r)
    meta_reg.fit(X_reg_transformed, training_target_r)
    # scoring
    score = meta_reg.score(X_reg_transformed, training_target_r)
    pipeline_score = sklearn_pipeline.score(training_features_r, training_target_r)
    assert np.allclose(score, pipeline_score)

    # test cv score
    cv_score = np.mean(cross_val_score(sklearn_pipeline, training_features_r, training_target_r, cv=3, scoring='r2'))
    known_cv_score = 0.7989564328211737
github EpistasisLab / tpot / tests / stacking_estimator_tests.py View on Github external
def test_StackingEstimator_1():
    """Assert that the StackingEstimator returns transformed X with synthetic features in classification."""
    clf = RandomForestClassifier(random_state=42)
    stack_clf = StackingEstimator(estimator=RandomForestClassifier(random_state=42))
    # fit
    clf.fit(training_features, training_target)
    stack_clf.fit(training_features, training_target)
    # get transformd X
    X_clf_transformed = stack_clf.transform(training_features)

    assert np.allclose(clf.predict(training_features), X_clf_transformed[:, 0])
    assert np.allclose(clf.predict_proba(training_features), X_clf_transformed[:, 1:1 + len(np.unique(training_target))])
github EpistasisLab / tpot / tests / stacking_estimator_tests.py View on Github external
def test_StackingEstimator_2():
    """Assert that the StackingEstimator returns transformed X with a synthetic feature in regression."""
    reg = RandomForestRegressor(random_state=42)
    stack_reg = StackingEstimator(estimator=RandomForestRegressor(random_state=42))
    # fit
    reg.fit(training_features_r, training_target_r)
    stack_reg.fit(training_features_r, training_target_r)
    # get transformd X
    X_reg_transformed = stack_reg.transform(training_features_r)

    assert np.allclose(reg.predict(training_features_r), X_reg_transformed[:, 0])
github zhangtianle / jd / tl / src / Main.py View on Github external
def ptop_2030(X, Y, Test, uid, online=0):
    train_X = X.as_matrix()
    train_Y = Y.as_matrix()

    test_X = Test.as_matrix()

    X_train, X_test, y_train, y_test = train_test_split(train_X, train_Y, test_size=0.2, random_state=1)

    # Score on the training set was:-3.207903288331976
    exported_pipeline = make_pipeline(
        SelectFwe(score_func=f_regression, alpha=0.038),
        StackingEstimator(estimator=LassoLarsCV(normalize=False)),
        StackingEstimator(
            estimator=GradientBoostingRegressor(alpha=0.9, learning_rate=1.0, loss="quantile", max_depth=4,
                                                max_features=0.8, min_samples_leaf=7, min_samples_split=17,
                                                n_estimators=100, subsample=0.1)),
        ExtraTreesRegressor(bootstrap=True, max_features=0.8500000000000001, min_samples_leaf=12, min_samples_split=10,
                            n_estimators=100)
    )

    exported_pipeline.fit(X_train, y_train)
    print("train:--------------")
    predict = exported_pipeline.predict(X_train)
    error(predict, y_train)

    print("test:---------------")
    predict = exported_pipeline.predict(X_test)
    error(predict, y_test)
github EpistasisLab / tpot / tutorials / MAGIC Gamma Telescope / tpot_MAGIC_Gamma_Telescope_pipeline.py View on Github external
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from sklearn.tree import DecisionTreeClassifier
from tpot.builtins import StackingEstimator

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=None)

# Average CV score on the training set was:0.853347788745
exported_pipeline = make_pipeline(
    StackingEstimator(estimator=LogisticRegression(C=10.0, dual=False, penalty="l2")),
    DecisionTreeClassifier(criterion="gini", max_depth=7, min_samples_leaf=5, min_samples_split=7)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
github zhangtianle / jd / tl / src / Main.py View on Github external
def ptot_result(X, Y, Test, uid, online=0):
    train_X = X.as_matrix()
    train_Y = Y.as_matrix()

    test_X = Test.as_matrix()

    X_train, X_test, y_train, y_test = train_test_split(train_X, train_Y, test_size=0.2, random_state=1)

    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=RidgeCV()),
        StackingEstimator(estimator=GradientBoostingRegressor(alpha=0.99, learning_rate=0.01, loss="huber", max_depth=6,
                                                              max_features=0.45, min_samples_leaf=12,
                                                              min_samples_split=18, n_estimators=100,
                                                              subsample=0.7500000000000001)),
        RandomForestRegressor(bootstrap=False, max_features=0.05, min_samples_leaf=11, min_samples_split=8,
                              n_estimators=100)
    )

    exported_pipeline.fit(X_train, y_train)
    print("train:--------------")
    predict = exported_pipeline.predict(X_train)
    error(predict, y_train)

    print("test:---------------")
    predict = exported_pipeline.predict(X_test)
    error(predict, y_test)
github zhangtianle / jd / tl / src / Main.py View on Github external
def ptop_2040(X, Y, Test, uid, online=0):
    train_X = X.as_matrix()
    train_Y = Y.as_matrix()

    test_X = Test.as_matrix()

    X_train, X_test, y_train, y_test = train_test_split(train_X, train_Y, test_size=0.2, random_state=1)

    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=LassoLarsCV(normalize=False)),
        RobustScaler(),
        GradientBoostingRegressor(alpha=0.8, learning_rate=0.1, loss="ls", max_depth=5, max_features=0.55,
                                  min_samples_leaf=12, min_samples_split=14, n_estimators=100, subsample=0.5)
    )

    exported_pipeline.fit(X_train, y_train)
    print("train:--------------")
    predict = exported_pipeline.predict(X_train)
    error(predict, y_train)

    print("test:---------------")
    predict = exported_pipeline.predict(X_test)
    error(predict, y_test)

    # online
    if online == 1:
github WillKoehrsen / machine-learning-project-walkthrough / auto_ml / tpot_exported_pipeline.py View on Github external
from tpot.builtins import StackingEstimator

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=42)

imputer = Imputer(strategy="median")
imputer.fit(training_features)
training_features = imputer.transform(training_features)
testing_features = imputer.transform(testing_features)

# Score on the training set was:-8.56166141363137
exported_pipeline = make_pipeline(
    StackingEstimator(estimator=LassoLarsCV(normalize=True)),
    GradientBoostingRegressor(alpha=0.95, learning_rate=0.1, loss="lad", max_depth=7, max_features=0.7500000000000001, min_samples_leaf=3, min_samples_split=18, n_estimators=100, subsample=0.6000000000000001)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)