Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_EnsembleClassifier_gridsearch_enumerate_names():
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
eclf = EnsembleClassifier(clfs=[clf1, clf1, clf2], voting='soft')
params = {'logisticregression-1__C': [1.0, 100.0],
'logisticregression-2__C': [1.0, 100.0],
'randomforestclassifier__n_estimators': [5, 20],}
grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
gs = grid.fit(iris.data, iris.target)
def test_EnsembleClassifier_gridsearch():
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
eclf = EnsembleClassifier(clfs=[clf1, clf2, clf3], voting='soft')
params = {'logisticregression__C': [1.0, 100.0],
'randomforestclassifier__n_estimators': [20, 200],}
grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
grid.fit(iris.data, iris.target)
mean_scores = []
for params, mean_score, scores in grid.grid_scores_:
mean_scores.append(round(mean_score, 2))
assert(mean_scores == [0.95, 0.96, 0.96, 0.95])
def test_EnsembleClassifier_weights():
np.random.seed(123)
clf1 = LogisticRegression()
clf2 = RandomForestClassifier()
clf3 = GaussianNB()
eclf = EnsembleClassifier(clfs=[clf1, clf2, clf3], voting='soft', weights=[1,2,10])
scores = cross_validation.cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
scores_mean = (round(scores.mean(), 2))
assert(scores_mean == 0.93)
def test_EnsembleClassifier():
np.random.seed(123)
clf1 = LogisticRegression()
clf2 = RandomForestClassifier()
clf3 = GaussianNB()
eclf = EnsembleClassifier(clfs=[clf1, clf2, clf3], voting='hard')
scores = cross_validation.cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
scores_mean = (round(scores.mean(), 2))
assert(scores_mean == 0.94)
else:
predictor_dump_path = sys.argv[2]
else:
predictor_dump_path = None
### Configuration ###
# We use NeuroPred's dataset for training/validation of our predictors.
project_paths.dataset_name = 'neuropred'
ensemble_classifiers = [
LogisticRegressionCV(Cs = 16, n_jobs = -2, class_weight = 'auto'),
RandomForestClassifier(n_estimators = 250, bootstrap = True, criterion = 'gini', n_jobs = -2, class_weight = 'auto'),
SVC(kernel = 'rbf', C = 3.798, probability = True, cache_size = 2400, class_weight = 'auto'),
]
classifiers = [EnsembleClassifier(clfs = ensemble_classifiers, voting = 'hard')]
feature_selector = FeatureSelectionPipeline([
VarianceThreshold(0.03),
SelectFdr(alpha = 0.1),
])
### Train the classifier and dump the predictor ###
windows_file = None
predictor_dump_file = None
def open_files():
global windows_file, predictor_dump_file
windows_file = open(project_paths.get_window_features_file_path(advanced), 'rb')
predictor_dump_file = util.open_file(predictor_dump_path, 'wb')