Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_fit_6():
"""Assert that the TPOT fit function provides an optimized pipeline with pandas DataFrame"""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0
)
tpot_obj.fit(pd_features, pd_target)
assert isinstance(pd_features, pd.DataFrame)
assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
assert not (tpot_obj._start_datetime is None)
def test_predict_proba():
"""Assert that the TPOT predict_proba function returns a numpy matrix of shape (num_testing_rows, num_testing_target)."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
pipeline_string = (
'DecisionTreeClassifier('
'input_matrix, '
'DecisionTreeClassifier__criterion=gini, '
'DecisionTreeClassifier__max_depth=8, '
'DecisionTreeClassifier__min_samples_leaf=5, '
'DecisionTreeClassifier__min_samples_split=5)'
)
tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
tpot_obj.fitted_pipeline_ = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline)
tpot_obj.fitted_pipeline_.fit(training_features, training_target)
result = tpot_obj.predict_proba(testing_features)
num_labels = np.amax(testing_target) + 1
def test_warm_start():
"""Assert that the TPOT warm_start flag stores the pop and pareto_front from the first run."""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0,
config_dict='TPOT light',
warm_start=True)
tpot_obj.fit(pretest_X, pretest_y)
assert tpot_obj._pop is not None
assert tpot_obj._pareto_front is not None
first_pop = tpot_obj._pop
tpot_obj.random_state = 21
tpot_obj.fit(pretest_X, pretest_y)
def test_check_dataset_4():
"""Assert that the check_dataset function raise ValueError when sample_weight has a length different length"""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0,
config_dict='TPOT light'
)
tpot_obj._fit_init()
test_sample_weight = list(range(1, len(training_target)))
assert_raises(ValueError, tpot_obj._check_dataset, training_features, training_target, test_sample_weight)
def test_sparse_matrix_5():
"""Assert that the TPOT fit function will not raise a ValueError in a sparse matrix with a customized config dictionary."""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0,
config_dict='tests/test_config_sparse.py'
)
tpot_obj.fit(sparse_features, sparse_target)
def test_varOr_3():
"""Assert that varOr() applys reproduction only and does NOT remove CV scores in offsprings."""
tpot_obj = TPOTClassifier(
random_state=42,
verbosity=0,
config_dict='TPOT light'
)
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pop = tpot_obj._toolbox.population(n=5)
for ind in pop:
ind.fitness.values = (2, 1.0)
offspring = varOr(pop, tpot_obj._toolbox, 5, cxpb=0.0, mutpb=0.0)
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
assert len(offspring) == 5
assert len(invalid_ind) == 0
from tpot import TPOTClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import time
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
train_size=0.25, test_size=0.75)
tpot = TPOTClassifier(generations=3, population_size=5, offspring_size=10, verbosity=3, n_jobs = 2, random_state = 44)#, max_time_mins=1)
time_start = time.time()
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
print('\nTime used with num_cpu = 2:',time.time()-time_start)
def test_init_n_jobs_3():
"""Assert that the TPOT init rasies ValueError if n_jobs=0."""
tpot_obj = TPOTClassifier(n_jobs=0)
assert tpot_obj.n_jobs == 0
assert_raises(ValueError, tpot_obj._fit_init)
def test_init_default_scoring_2():
"""Assert that TPOT rasies ValueError with a invalid sklearn metric function."""
tpot_obj = TPOTClassifier(scoring=balanced_accuracy)
assert_raises(ValueError, tpot_obj._fit_init)
telescope=pd.read_csv('MAGIC Gamma Telescope Data.csv')
#clean the data
telescope_shuffle=telescope.iloc[np.random.permutation(len(telescope))]
tele=telescope_shuffle.reset_index(drop=True)
#Store 2 classes
tele['Class']=tele['Class'].map({'g':0, 'h':1})
tele_class = tele['Class'].values
#Split training, testing, and validation data
training_indices, validation_indices = training_indices, testing_indices = train_test_split(tele.index,
stratify= tele_class, train_size=0.75, test_size=0.25)
#Let Genetic Programming find best ML model and hyperparameters
tpot = TPOTClassifier(generations=5, verbosity=2)
tpot.fit(tele.drop('Class', axis=1).loc[training_indices].values,
tele.loc[training_indicss, 'Class'].values)
#Score the accuracy
tpot.score(tele.drop('Class', axis=1).loc[validation_indices].values,
tele.loc[validation_indices, 'Class'].values)
#Export the generated code
tpot.export('pipeline.py')