Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
model = build_model(SHAPE, nb_classes, bn_axis)
model.compile(optimizer=Adam(lr=1.0e-4),
loss='categorical_crossentropy', metrics=['accuracy'])
# Fit the model
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs)
# Save Model or creates a HDF5 file
model.save('{}epochs_{}batch_vgg16_model_{}.h5'.format(
epochs, batch_size, data_directory.replace("/", "_")), overwrite=True)
# del model # deletes the existing model
predicted = model.predict(X_test)
y_pred = np.argmax(predicted, axis=1)
Y_test = np.argmax(Y_test, axis=1)
cm = confusion_matrix(Y_test, y_pred)
report = classification_report(Y_test, y_pred)
tn = cm[0][0]
fn = cm[1][0]
tp = cm[1][1]
fp = cm[0][1]
if tp == 0:
tp = 1
if tn == 0:
tn = 1
if fp == 0:
fp = 1
if fn == 0:
fn = 1
TPR = float(tp)/(float(tp)+float(fn))
FPR = float(fp)/(float(fp)+float(tn))
accuracy = round((float(tp) + float(tn))/(float(tp) +
vader_feat = VaderFeatureExtractor(tokenizer)
liu_feat = LiuFeatureExtractor(tokenizer)
log_mod = LogisticRegression(solver='liblinear',multi_class='ovr')
ngram_lex_clf = Pipeline([ ('feats',
FeatureUnion([ ('ngram', vectorizer), ('vader',vader_feat),('liu',liu_feat) ])),
('clf', log_mod)])
ngram_lex_clf.fit(train_data.tweet, train_data.sent)
pred_ngram_lex = ngram_lex_clf.predict(test_data.tweet)
conf_ngram_lex = confusion_matrix(test_data.sent, pred_ngram_lex)
kappa_ngram_lex = cohen_kappa_score(test_data.sent, pred_ngram_lex)
class_rep = classification_report(test_data.sent, pred_ngram_lex)
print('Confusion Matrix for Logistic Regression + ngrams + features from Bing Liu\'s Lexicon and the Vader method')
print(conf_ngram_lex)
print('Classification Report')
print(class_rep)
print('kappa:'+str(kappa_ngram_lex))
# Building deep neural network
clf = MLPClassifier(solver='lbfgs',
alpha=1e-5,
hidden_layer_sizes = (5, 2),
random_state = 1)
print clf
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print "accuracy_score:"
print metrics.accuracy_score(y_test, y_pred)
print "recall_score:"
print metrics.recall_score(y_test, y_pred)
print "precision_score:"
print metrics.precision_score(y_test, y_pred)
print metrics.confusion_matrix(y_test, y_pred)
joblib.dump(clf,mode_file)
def train_and_evaluate(clf, X_train, X_test, y_train, y_test):
clf.fit(X_train, y_train)
print ("Accuracy on training set:")
print (clf.score(X_train, y_train))
print ("Accuracy on testing set:")
print (clf.score(X_test, y_test))
y_pred = clf.predict(X_test)
print ("Classification Report:")
print (metrics.classification_report(y_test, y_pred))
print ("Confusion Matrix:")
print (metrics.confusion_matrix(y_test, y_pred))
cv.fit(training_x, training_y)
reg_table = pd.DataFrame(cv.cv_results_)
reg_table.to_csv('{}/{}_{}_reg.csv'.format(OUTPUT_DIRECTORY, clf_type, dataset), index=False)
test_score = cv.score(test_x, test_y)
# TODO: Ensure this is an estimator that can handle this?
best_estimator = cv.best_estimator_.fit(training_x, training_y)
final_estimator = best_estimator._final_estimator
grid_best_params = pd.DataFrame([final_estimator.get_params()])
grid_best_params.to_csv('{}/{}_{}_best_params.csv'.format(OUTPUT_DIRECTORY, clf_type, dataset), index=False)
logger.info(" - Grid search complete")
final_estimator.write_visualization('{}/images/{}_{}_LC'.format(OUTPUT_DIRECTORY, clf_type, dataset))
test_y_predicted = cv.predict(test_x)
cnf_matrix = confusion_matrix(test_y, test_y_predicted)
np.set_printoptions(precision=2)
plt = plot_confusion_matrix(cnf_matrix, classes,
title='Confusion Matrix: {} - {}'.format(clf_type, dataset_readable_name))
plt.savefig('{}/images/{}_{}_CM.png'.format(OUTPUT_DIRECTORY, clf_type, dataset), format='png', dpi=150,
bbox_inches='tight')
plt = plot_confusion_matrix(cnf_matrix, classes, normalize=True,
title='Normalized Confusion Matrix: {} - {}'.format(clf_type, dataset_readable_name))
plt.savefig('{}/images/{}_{}_NCM.png'.format(OUTPUT_DIRECTORY, clf_type, dataset), format='png', dpi=150,
bbox_inches='tight')
logger.info(" - Visualization complete")
with open('{}/test results.csv'.format(OUTPUT_DIRECTORY), 'a') as f:
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
f.write('"{}",{},{},{},"{}"\n'.format(ts, clf_type, dataset, test_score, cv.best_params_))
def cross_lang_testing_classification(train_labels,train_data, test_labels, test_data):
uni_to_tri_vectorizer = CountVectorizer(analyzer = "word", tokenizer = None, preprocessor = None, stop_words = None, ngram_range=(1,3), min_df=10, max_features = 500)
vectorizers = [uni_to_tri_vectorizer]
classifiers = [RandomForestClassifier()] #GradientBoostingClassifier()] #Side note: gradient boosting needs a dense array. Testing fails for that. Should modifiy the pipeline later to account for this.
#Check this discussion for handling the sparseness issue: https://stackoverflow.com/questions/28384680/scikit-learns-pipeline-a-sparse-matrix-was-passed-but-dense-data-is-required
for vectorizer in vectorizers:
for classifier in classifiers:
print("Printing results for: " + str(classifier) + str(vectorizer))
text_clf = Pipeline([('vect', vectorizer), ('clf', classifier)])
text_clf.fit(train_data,train_labels)
predicted = text_clf.predict(test_data)
print(vectorizer.get_feature_names())
print(np.mean(predicted == test_labels,dtype=float))
print(confusion_matrix(test_labels, predicted, labels=["A1","A2","B1","B2", "C1", "C2"]))
print("CROSS LANG EVAL DONE")
f.write(rgb_dir + ' ' + depth_dir + ' '+ir_dir+' ' + str(preds[i_batch,1]) +'\n')
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
line = 'Test: [{0}/{1}]\t' \
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format(i, len(val_loader), batch_time=batch_time,
loss=losses, top1=top1)
with open('logs/{}_{}.log'.format(time_stp, args.arch), 'a+') as flog:
flog.write('{}\n'.format(line))
print(line)
tn, fp, fn, tp = confusion_matrix(label_list, predicted_list).ravel()
apcer = fp/(tn + fp)
npcer = fn/(fn + tp)
acer = (apcer + npcer)/2
metric =roc.cal_metric(label_list, result_list)
eer = metric[0]
tprs = metric[1]
auc = metric[2]
xy_dic = metric[3]
# tpr1 = tprs['TPR@FPR=10E-2']
# logger.info('eer: {}\t'
# 'tpr1: {}\t'
# 'auc: {}\t'
# 'acer: {}\t'
# 'accuracy: {top1.avg:.3f} ({top1.avg:.3f})'
# .format(eer,tpr1,auc,acer,top1=top1))
# pickle.dump(xy_dic, open('xys/xy_{}_{}_{}.pickle'.format(time_stp, args.arch,epoch),'wb'))
def plot_confusion_matrix(model, loader):
# Predict the values from the validation dataset
model.eval()
model_output = torch.cat([model(x) for x, _ in loader])
predictions = torch.argmax(model_output, dim=1)
targets = torch.cat([y for _, y in loader])
conf_matrix = confusion_matrix(targets, predictions)
df_cm = pd.DataFrame(conf_matrix)
sn.set(font_scale=1)
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16})
prediction = np.array(predicted)
#### Leftover code for a leave-one-out crossval. Can probably be safely deleted
if useLeaveOneOut is True:
if firstIterCV is True:
probabilities = np.append(probabilities, probs, axis=1)
firstIterCV = False
predictions = np.append(predictions, prediction)
else:
probabilities = np.append(probabilities, probs, axis=0)
predictions = np.append(predictions, prediction)
else:
predictions = np.append(predictions, prediction)
probabilities = np.append(probabilities, probs)
if useLeaveOneOut is not True:
cm = np.array(confusion_matrix(test_label, prediction))
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
confusion_matrixes.append(cm)
confusion_matrixes_percent.append(cm_normalized)
avg_confusion_matrixes = np.mean(confusion_matrixes_percent, axis=0)
if verbose is True:
print('CV #' + str(count))
print('Prediction: ' + str(prediction))
print(' Actual: ' + str(test_label))
# Append probs to the global list
probs_np = np.array(probs)
cv_probabilities.append(probs_np[:, 0])
cv_probabilities_label.append(test_label)
# if useLeaveOneOut is not True:
# print('Confusion matrix')
def plot_confusin_martrix(cls_pred):
'''
@param cls_pred: the prediction value, because we know the testset true value
'''
cls_true = data.test.cls
cm = confusion_matrix(cls_true, cls_pred)
plt.matshow(cm)
plt.colorbar()
tick_marks = np.arange(num_classes)
plt.xticks(tick_marks, range(num_classes))
plt.yticks(tick_marks, range(num_classes))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()